In [1]:
%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import torchvision
import math
from torch.autograd import Variable
import copy
import torchvision.transforms as transforms

In [3]:
import pandas as pd 
import numpy as np

In [4]:
rating_dir = './RCdata/rating_final.csv'
rest_deets_dir = './RCdata/geoplaces2.csv'
rest_cuisine_dir = './RCdata/chefmozcuisine.csv'

In [9]:
ratings = pd.read_csv(rating_dir)
rest_deets = pd.read_csv(rest_deets_dir, encoding = 'cp1252')
rest_cuisine = pd.read_csv(rest_cuisine_dir, encoding = 'cp1252')

In [10]:
ratings.rating.value_counts(normalize  = True)

2    0.418605
1    0.362618
0    0.218777
Name: rating, dtype: float64

In [11]:
ratings.rating.mean()

1.1998277347114557

In [12]:
ratings.rating.median()

1.0

In [13]:
from sklearn.model_selection import train_test_split
rating_train, rating_test = train_test_split(ratings, test_size=0.3)

In [14]:
n_users = len(ratings.userID.unique())
n_items = len(ratings.placeID.unique())
print (n_users)
print (n_items)

138
130


In [15]:
user_list = np.array(ratings.userID.unique())
item_list = np.array(ratings.placeID.unique())

In [16]:
from scipy.sparse import rand as sprand
from scipy.sparse import lil_matrix

In [18]:
#original ratings
interactions_full_orig = lil_matrix((n_users, n_items), dtype = float)
for row in ratings.itertuples():
    user, = np.where(user_list == row[1])
    item, = np.where(item_list == row[2])
    interactions_full_orig[user[0], item[0]] = row[3]   

In [19]:
#food + service
interactions_full = lil_matrix((n_users, n_items), dtype = float)
for row in ratings.itertuples():
    user, = np.where(user_list == row[1])
    item, = np.where(item_list == row[2])
    interactions_full[user[0], item[0]] = row[4] + row[5] 

In [20]:
#original ratings
interactions_train_orig = lil_matrix((n_users, n_items), dtype = float)
for row in rating_train.itertuples():
    user, = np.where(user_list == row[1])
    item, = np.where(item_list == row[2])
    interactions_train_orig[user[0], item[0]] = row[3]

In [21]:
#food + service
interactions_train = lil_matrix((n_users, n_items), dtype = float)
for row in rating_train.itertuples():
    user, = np.where(user_list == row[1])
    item, = np.where(item_list == row[2])
    interactions_train[user[0], item[0]] = row[4] + row[5]    

In [24]:
#original ratings
interactions_test_orig = lil_matrix((n_users, n_items), dtype = float)
for row in rating_test.itertuples():
    user, = np.where(user_list == row[1])
    item, = np.where(item_list == row[2])
    interactions_test_orig[user[0], item[0]] = row[3]

In [25]:
#food + service
interactions_test = lil_matrix((n_users, n_items), dtype = float)
for row in rating_test.itertuples():
    user, = np.where(user_list == row[1])
    item, = np.where(item_list == row[2])
    interactions_test[user[0], item[0]] = row[4] + row[5] 

In [26]:
class MatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items, n_factors=5):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users,
                                               n_factors,
                                               sparse=False)
        self.item_factors = torch.nn.Embedding(n_items,n_factors,sparse=False)
                                               

    # For convenience when we want to predict a sinble user-item pair.
    def predict(self, user, item):
        # Need to fit bias factors
        return (self.user_factors(user) * self.item_factors(item)).sum(1)
    
    # Much more efficient batch operator. This should be used for training purposes
    
    def forward(self, users, items):
        #return (self.user_factors(user) * self.item_factors(item)).sum(1)
    
        return torch.mm(self.user_factors(users),torch.transpose(self.item_factors(items),0,1))

In [27]:
class BiasedMatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items,mean, n_factors=5):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users,
                                               n_factors,
                                               sparse=False)
        self.item_factors = torch.nn.Embedding(n_items, n_factors,sparse=False)
        self.user_biases = torch.nn.Embedding(n_users, 1, sparse = False)
        
        self.item_biases = torch.nn.Embedding(n_items,1, sparse = False)
                                               
        self.mu = mean
    
    #def forward(self, user, item):
        #pred = self.user_biases(user) + self.item_biases(item)
        #pred += (self.user_factors(user) * self.item_factors(item)).sum(1)
        #return pred
        
    def predict(self, user, item):
        pred = self.mu + self.user_biases(user) + self.item_biases(item)
        orig = (self.user_factors(user) * self.item_factors(item)).sum(1)
        final = pred + orig
        return final
    
    def forward(self, users, items):
        
        item_means = []
        user_means = []
        for i in range(len(users)):
            item_means.append(torch.t(self.item_biases(items)))
            
        for j in range(len(items)):
            user_means.append(torch.t(self.user_biases(users)))
        
        it_means = torch.cat(item_means, 0)
        us_means = torch.cat(user_means, 0)
        us_means = torch.t(us_means)
        
        orig = torch.mm(self.user_factors(users),torch.transpose(self.item_factors(items),0,1))
        
        y = torch.add(orig, it_means)
        
        f = torch.add(y,us_means)
        
        result = torch.add(f,self.mu)
        
        return result

In [28]:
def get_batch(batch_size,ratings):
    # Sort our data and scramble it
    rows, cols = ratings.shape
    p = np.random.permutation(rows)
    
    # create batches
    sindex = 0
    eindex = batch_size
    while eindex < rows:
        batch = p[sindex:eindex]
        temp = eindex
        eindex = eindex + batch_size
        sindex = temp
        yield batch

    if eindex >= rows:
        batch = range(sindex,rows)
        yield batch

In [29]:
def load_model(model,load_path):
    load_dict = torch.load(load_path)
    val_loss = load_dict['val_loss']
    model.load_state_dict(load_dict['model_state_dict'])


def checkpoint_model(val_loss, model,save_path):
    save_dict = dict(
                     val_loss=val_loss,
                     model_state_dict=model.state_dict())
                     #opt_state_dict=predictions.state_dict())
    torch.save(save_dict, save_path)

In [39]:
def run_validation(model, test_ratings, BATCH_SIZE, load_path):
    load_model(model, load_path)
    running_loss = 0.0
    loss_func = torch.nn.MSELoss()
    accur = 0
    for i,batch in enumerate(get_batch(BATCH_SIZE, test_ratings)):
        
        interactions = Variable(torch.FloatTensor(test_ratings[batch, :].toarray()))
        rows = Variable(torch.LongTensor(batch))
        cols = Variable(torch.LongTensor(np.arange(test_ratings.shape[1])))
        
        predictions = model(rows, cols)
        loss = loss_func(predictions, interactions)
        
        running_loss += np.sqrt(loss.data[0])*BATCH_SIZE
        
        correct = get_accuracy_test(test_ratings, model, rows)
        accur = accur+ correct
    
    epoch_loss = running_loss/test_ratings.shape[0]
    return epoch_loss,accur

In [40]:
def run_validation_test(model, test_ratings, test_ratings_orig, BATCH_SIZE, load_path):
    load_model(model, load_path)
    running_loss = 0.0
    loss_func = torch.nn.MSELoss()
    accur = 0
    for i,batch in enumerate(get_batch(BATCH_SIZE, test_ratings)):
        
        interactions = Variable(torch.FloatTensor(test_ratings[batch, :].toarray()))
        rows = Variable(torch.LongTensor(batch))
        cols = Variable(torch.LongTensor(np.arange(test_ratings.shape[1])))
        
        predictions = model(rows, cols)
        loss = loss_func(predictions, interactions)
        
        running_loss += np.sqrt(loss.data[0])*BATCH_SIZE
        
        correct = get_accuracy_test(test_ratings_orig, model, rows)
        accur = accur+ correct
    
    epoch_loss = running_loss/test_ratings.shape[0]
    return epoch_loss,accur

In [34]:
def get_accuracy_test(interactions_orig,model,rows):
    
    checks = np.nonzero(interactions_orig)
    check_these = checks[0]
    
    batch = range(interactions_train.shape[0])
    interactions = Variable(torch.FloatTensor(interactions_train[batch,:].toarray()))
    users = Variable(torch.LongTensor(batch))
    cols = Variable(torch.LongTensor(np.arange(interactions_train.shape[1])))
    predictions = model(users, cols)
    
    x = predictions.data.numpy()
    
    correct = 0
    
    for u in range(len(rows)):
        user = rows.data[u]
        where = np.where(check_these==user)
        index = where[0]
        if len(index) > 0:
            for c in range(len(index)):
                rest = checks[1][index[c]]
                prediction = predictions[u][rest]
                   
                val = prediction.data[0]
                
                if val <= np.percentile(x,22):
                    this_val = 0.0
                elif np.percentile(x,22) < val < np.percentile(x, 58):
                    this_val = 1.0
                elif val >= np.percentile(x, 58):
                     this_val= 2.0
        
                
                if this_val == interactions_orig[user,rest]:
                    correct = correct + 1 
            else:
                next
    return correct

In [59]:
def generate_predictions(model, interactions_full, load_path):
    load_model(model, load_path)

    batch = range(interactions_full.shape[0])
    interactions = Variable(torch.FloatTensor(interactions_full[batch,:].toarray()))
    rows = Variable(torch.LongTensor(batch))
    cols = Variable(torch.LongTensor(np.arange(interactions_full.shape[1])))
    predictions = model(rows, cols)
    
    predictions[interactions!=0] = 0
        
    val, index = torch.sort(predictions,1, descending = True)
        
    top5 = index[:,:5]
        
    return top5

In [60]:
def give_top5_cuisines(recs, name_list, cuisine_list, user_list, rest_list):
    your_rests = [[0 for j in range(11)] for u in range(len(user_list))]
    for i in range(len(user_list)):
        user, = np.where(user_list == user_list[i])
        your_rests[i][0] = user_list[i]
        count = 1
        for r in range(5):
            this_rest = recs[user[0]][r].data[0]
            rest = rest_list[this_rest]
            x = name_list.loc[name_list['placeID'] == rest]
            name = x['name'].item()
            
            your_rests[i][count] = name
            count = count +1
            
            y = cuisine_list.loc[cuisine_list['placeID'] == rest]['Rcuisine']
            
            cuisine_types = ''
            for m in y:
                cuisine_types = cuisine_types + m + ', '
            
            your_rests[i][count] = cuisine_types
            count = count + 1
            
    return your_rests

In [73]:
batch_size = 10
save_path = './project_models/model.ckpt'
save_path_val = './project_models/modelv.ckpt'
factors = [1,2,3,4,5]
decays = [0.1,0.01,0.001]
LR = [1e-6, 1e-5, 1e-4, 1e-3]

In [80]:
col_names = ['User','Rec1', 'Rec1_Cuisines', 'Rec2', 'Rec2_Cuisines', 'Rec3', 'Rec3_Cuisines', 'Rec4','Rec4_Cuisines',  'Rec5', 'Rec5_Cuisines']

In [81]:
checks = np.nonzero(interactions_train_orig)
checks_v = np.nonzero(interactions_test_orig)

In [77]:
best_val_loss = 1000.0
for f in range(2):
    print ("FACTOR: " + str(factors[f]))
    for r in range(len(decays)):
        print ("Decay: " + str(decays[r]))
        for l in range(len(LR)):
            print ('LR: '+ str(LR[l]))
            best_train_loss = 1000.0
            accur = 0
            model = MatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1], n_factors=factors[f])
            loss_func = torch.nn.MSELoss()
            reg_loss_func = torch.optim.Adam(model.parameters(), lr=LR[l], weight_decay=decays[r])
            for i in range(5):
                print ("Epoch: " + str(i))
                running_loss = 0.0
                accur = 0
                for m, batch in enumerate(get_batch(batch_size, interactions_train_orig)):
                    reg_loss_func.zero_grad()
                    interactions = Variable(torch.FloatTensor(interactions_train_orig[batch,:].toarray()))
                    rows = Variable(torch.LongTensor(batch))
                    cols = Variable(torch.LongTensor(np.arange(interactions_train_orig.shape[1])))
                    predictions = model(rows, cols)
        
                    loss = loss_func(predictions, interactions)
                    running_loss += np.sqrt(loss.data[0])*batch_size
                
                    correct = get_accuracy_test(interactions_train_orig, model, rows)
                    accur = accur + correct
                
                    loss.backward()
                reg_loss_func.step()
    
                epoch_loss = running_loss/interactions_train.shape[0]
                print ('train_loss: ' + str(epoch_loss))
            
                print ('accuracy:' + str(accur/len(checks[0])))
            
                if epoch_loss < best_train_loss:
                    best_train_loss = epoch_loss
                    print ('checkpoint model with train loss: ' + str(best_train_loss))
                    checkpoint_model(epoch_loss, model, save_path)
        
                val_loss, val_accuracy = run_validation(model,interactions_test_orig, batch_size, save_path)
                print ('val loss: ' + str(val_loss))
                print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    print ('checkpointing_model with val loss: ' + str(best_val_loss))
                    checkpoint_model(val_loss, model, save_path_val)

FACTOR: 1
Decay: 0.1
LR: 1e-06
Epoch: 0
train_loss: 1.0226509285504506
accuracy:0.4296998420221169
checkpoint model with train loss: 1.0226509285504506
val loss: 0.996385376608017
val accuracy: 0.39416058394160586
checkpointing_model with val loss: 0.996385376608017
Epoch: 1
train_loss: 1.0243916054097855
accuracy:0.42338072669826227
val loss: 0.9899905426276836
val accuracy: 0.4306569343065693
checkpointing_model with val loss: 0.9899905426276836
Epoch: 2
train_loss: 1.0190996304070026
accuracy:0.42022116903633494
checkpoint model with train loss: 1.0190996304070026
val loss: 0.9813039695970787
val accuracy: 0.4489051094890511
checkpointing_model with val loss: 0.9813039695970787
Epoch: 3
train_loss: 1.005821277036003
accuracy:0.3886255924170616
checkpoint model with train loss: 1.005821277036003
val loss: 0.9740390535003287
val accuracy: 0.43795620437956206
checkpointing_model with val loss: 0.9740390535003287
Epoch: 4
train_loss: 0.9917050505328091
accuracy:0.4075829383886256
checkp

train_loss: 0.9692699267733649
accuracy:0.3522906793048973
checkpoint model with train loss: 0.9692699267733649
val loss: 0.9529351517846869
val accuracy: 0.354014598540146
LR: 0.0001
Epoch: 0
train_loss: 0.9438673108562076
accuracy:0.41232227488151657
checkpoint model with train loss: 0.9438673108562076
val loss: 0.9129233819841982
val accuracy: 0.3795620437956204
checkpointing_model with val loss: 0.9129233819841982
Epoch: 1
train_loss: 0.9368660987937181
accuracy:0.43285939968404424
checkpoint model with train loss: 0.9368660987937181
val loss: 0.9025621906116507
val accuracy: 0.4562043795620438
checkpointing_model with val loss: 0.9025621906116507
Epoch: 2
train_loss: 0.9368893003337428
accuracy:0.3807266982622433
val loss: 0.8844511559037546
val accuracy: 0.3905109489051095
checkpointing_model with val loss: 0.8844511559037546
Epoch: 3
train_loss: 0.9303438163202513
accuracy:0.43285939968404424
checkpoint model with train loss: 0.9303438163202513
val loss: 0.9222558322470366
val a

train_loss: 1.5196340588976762
accuracy:0.3807266982622433
val loss: 1.5056769456423449
val accuracy: 0.3905109489051095
Decay: 0.001
LR: 1e-06
Epoch: 0
train_loss: 1.2812456016145823
accuracy:0.3333333333333333
checkpoint model with train loss: 1.2812456016145823
val loss: 1.2616688984447173
val accuracy: 0.3722627737226277
Epoch: 1
train_loss: 1.2888797565301888
accuracy:0.37756714060031593
val loss: 1.2748944914176634
val accuracy: 0.38321167883211676
Epoch: 2
train_loss: 1.2939580896329295
accuracy:0.353870458135861
val loss: 1.279111824944
val accuracy: 0.3284671532846715
Epoch: 3
train_loss: 1.2768017992586838
accuracy:0.36334913112164297
checkpoint model with train loss: 1.2768017992586838
val loss: 1.244476463350253
val accuracy: 0.3978102189781022
Epoch: 4
train_loss: 1.2972955113073992
accuracy:0.353870458135861
val loss: 1.2633102486920584
val accuracy: 0.35766423357664234
LR: 1e-05
Epoch: 0
train_loss: 1.5659259157495824
accuracy:0.353870458135861
checkpoint model with trai

In [147]:
#original ratings
best_val_loss = 1000.0
best_train_loss = 1000.0
accur = 0
model = MatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1], n_factors=1)
loss_func = torch.nn.MSELoss()
reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.001)
for i in range(5):
    print ("Epoch: " + str(i))
    running_loss = 0.0
    accur = 0
    #best_train_loss = 1000.0
    for m, batch in enumerate(get_batch(batch_size, interactions_train_orig)):
        reg_loss_func.zero_grad()
        interactions = Variable(torch.FloatTensor(interactions_train_orig[batch,:].toarray()))
        rows = Variable(torch.LongTensor(batch))
        cols = Variable(torch.LongTensor(np.arange(interactions_train_orig.shape[1])))
        predictions = model(rows, cols)
        
        loss = loss_func(predictions, interactions)
        running_loss += np.sqrt(loss.data[0])*batch_size
                
        correct = get_accuracy_test(interactions_train_orig, model, rows)
        accur = accur + correct
                
        loss.backward()
    reg_loss_func.step()
    
    epoch_loss = running_loss/interactions_train_orig.shape[0]
    print ('train_loss: ' + str(epoch_loss))
            
    print ('accuracy:' + str(accur/len(checks[0])))
            
    if epoch_loss < best_train_loss:
        best_train_loss = epoch_loss
        print ('checkpoint model with train loss: ' + str(best_train_loss))
        checkpoint_model(epoch_loss, model, save_path)
        
    val_loss, val_accuracy = run_validation(model,interactions_test_orig, batch_size, save_path)
    print ('val loss: ' + str(val_loss))
    print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        print ('checkpointing_model with val loss: ' + str(best_val_loss))
        checkpoint_model(val_loss, model, save_path_val)

Epoch: 0
train_loss: 1.0285278567167118
accuracy:0.41390205371248023
checkpoint model with train loss: 1.0285278567167118
val loss: 0.9964574534905863
val accuracy: 0.38321167883211676
checkpointing_model with val loss: 0.9964574534905863
Epoch: 1
train_loss: 1.0160876932271727
accuracy:0.3696682464454976
checkpoint model with train loss: 1.0160876932271727
val loss: 0.9781802471408063
val accuracy: 0.41605839416058393
checkpointing_model with val loss: 0.9781802471408063
Epoch: 2
train_loss: 1.0154566482596095
accuracy:0.39968404423380727
checkpoint model with train loss: 1.0154566482596095
val loss: 0.9624081673352644
val accuracy: 0.40875912408759124
checkpointing_model with val loss: 0.9624081673352644
Epoch: 3
train_loss: 1.0103970298649618
accuracy:0.4597156398104265
checkpoint model with train loss: 1.0103970298649618
val loss: 0.9666788482509628
val accuracy: 0.35766423357664234
Epoch: 4
train_loss: 0.9990417596100846
accuracy:0.3665086887835703
checkpoint model with train loss

In [148]:
recs = generate_predictions(model, interactions_full_orig, save_path_val)
results = give_top5_cuisines(recs, rest_deets, rest_cuisine, user_list, item_list)

In [149]:
df = pd.DataFrame(results,columns = col_names)

In [154]:
len(df['Rec5'].value_counts().index.tolist())

7

In [69]:
checks = np.nonzero(interactions_train)
checks_v = np.nonzero(interactions_test)

In [78]:
best_val_loss = 1000.0
for f in range(2):
    print ("FACTOR: " + str(factors[f]))
    for r in range(len(decays)):
        print ("Decay: " + str(decays[r]))
        for l in range(len(LR)):
            print ('LR: '+ str(LR[l]))
            best_train_loss = 1000.0
            accur = 0
            model = MatrixFactorization(interactions_train.shape[0], interactions_train.shape[1], n_factors=factors[f])
            loss_func = torch.nn.MSELoss()
            reg_loss_func = torch.optim.Adam(model.parameters(), lr=LR[l], weight_decay=decays[r])
            for i in range(5):
                print ("Epoch: " + str(i))
                running_loss = 0.0
                accur = 0
                for m, batch in enumerate(get_batch(batch_size, interactions_train)):
                    reg_loss_func.zero_grad()
                    interactions = Variable(torch.FloatTensor(interactions_train[batch,:].toarray()))
                    rows = Variable(torch.LongTensor(batch))
                    cols = Variable(torch.LongTensor(np.arange(interactions_train.shape[1])))
                    predictions = model(rows, cols)
        
                    loss = loss_func(predictions, interactions)
                    running_loss += np.sqrt(loss.data[0])*batch_size
                
                    correct = get_accuracy_test(interactions_train_orig, model, rows)
                    accur = accur + correct
                
                    loss.backward()
                reg_loss_func.step()
    
                epoch_loss = running_loss/interactions_train.shape[0]
                print ('train_loss: ' + str(epoch_loss))
            
                print ('accuracy:' + str(accur/len(checks[0])))
            
                if epoch_loss < best_train_loss:
                    best_train_loss = epoch_loss
                    print ('checkpoint model with train loss: ' + str(best_train_loss))
                    checkpoint_model(epoch_loss, model, save_path)
        
                val_loss, val_accuracy = run_validation_test(model, interactions_test, interactions_test_orig, batch_size, save_path)
                print ('val loss: ' + str(val_loss))
                print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    print ('checkpointing_model with val loss: ' + str(best_val_loss))
                    checkpoint_model(val_loss, model, save_path_val)

FACTOR: 1
Decay: 0.1
LR: 1e-06
Epoch: 0
train_loss: 1.08315518156869
accuracy:0.3412322274881517
checkpoint model with train loss: 1.08315518156869
val loss: 1.0229756586841325
val accuracy: 0.36496350364963503
checkpointing_model with val loss: 1.0229756586841325
Epoch: 1
train_loss: 1.0565392632364436
accuracy:0.3886255924170616
checkpoint model with train loss: 1.0565392632364436
val loss: 1.009177799872288
val accuracy: 0.35036496350364965
checkpointing_model with val loss: 1.009177799872288
Epoch: 2
train_loss: 1.0841222107282555
accuracy:0.37756714060031593
val loss: 0.9965144403304358
val accuracy: 0.3175182481751825
checkpointing_model with val loss: 0.9965144403304358
Epoch: 3
train_loss: 1.105832631430245
accuracy:0.358609794628752
val loss: 0.9870537844776622
val accuracy: 0.3686131386861314
checkpointing_model with val loss: 0.9870537844776622
Epoch: 4
train_loss: 1.1206031319859466
accuracy:0.36334913112164297
val loss: 1.0260314753447468
val accuracy: 0.3357664233576642
L

train_loss: 1.1353701169204686
accuracy:0.3902053712480253
checkpoint model with train loss: 1.1353701169204686
val loss: 1.0561702339548034
val accuracy: 0.38686131386861317
Epoch: 1
train_loss: 1.1654598965592258
accuracy:0.37598736176935227
val loss: 1.055928473366727
val accuracy: 0.36496350364963503
Epoch: 2
train_loss: 1.1174919820584417
accuracy:0.382306477093207
checkpoint model with train loss: 1.1174919820584417
val loss: 1.0667070614159786
val accuracy: 0.3905109489051095
Epoch: 3
train_loss: 1.1469677344580302
accuracy:0.37756714060031593
val loss: 1.0668001248214924
val accuracy: 0.3795620437956204
Epoch: 4
train_loss: 1.1530181180730594
accuracy:0.4107424960505529
val loss: 1.06752052851759
val accuracy: 0.3905109489051095
LR: 0.001
Epoch: 0
train_loss: 1.166989141189565
accuracy:0.40442338072669826
checkpoint model with train loss: 1.166989141189565
val loss: 1.0511026133571009
val accuracy: 0.3686131386861314
Epoch: 1
train_loss: 1.174422166407258
accuracy:0.40758293838

val loss: 1.242347961654185
val accuracy: 0.3722627737226277
Epoch: 4
train_loss: 1.3369192283513016
accuracy:0.3886255924170616
val loss: 1.2525773275429481
val accuracy: 0.3795620437956204
LR: 1e-05
Epoch: 0
train_loss: 1.4658081307357327
accuracy:0.36018957345971564
checkpoint model with train loss: 1.4658081307357327
val loss: 1.4097932533440825
val accuracy: 0.3467153284671533
Epoch: 1
train_loss: 1.488543567774597
accuracy:0.37598736176935227
val loss: 1.404421702164653
val accuracy: 0.32116788321167883
Epoch: 2
train_loss: 1.4593553245109887
accuracy:0.3712480252764613
checkpoint model with train loss: 1.4593553245109887
val loss: 1.3680348216570186
val accuracy: 0.354014598540146
Epoch: 3
train_loss: 1.4725999953517657
accuracy:0.35545023696682465
val loss: 1.3889737203366954
val accuracy: 0.3722627737226277
Epoch: 4
train_loss: 1.4684871189462345
accuracy:0.3412322274881517
val loss: 1.3619742234898504
val accuracy: 0.3613138686131387
LR: 0.0001
Epoch: 0
train_loss: 1.50589805

In [163]:
#food + service ratings
best_val_loss = 1000.0
best_train_loss = 1000.0
accur = 0
model = MatrixFactorization(interactions_train.shape[0], interactions_train.shape[1], n_factors=2)
loss_func = torch.nn.MSELoss()
reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
for i in range(5):
    print ("Epoch: " + str(i))
    running_loss = 0.0
    accur = 0
    for m, batch in enumerate(get_batch(batch_size, interactions_train)):
        reg_loss_func.zero_grad()
        interactions = Variable(torch.FloatTensor(interactions_train[batch,:].toarray()))
        rows = Variable(torch.LongTensor(batch))
        cols = Variable(torch.LongTensor(np.arange(interactions_train.shape[1])))
        predictions = model(rows, cols)
        
        loss = loss_func(predictions, interactions)
        running_loss += np.sqrt(loss.data[0])*batch_size
                
        correct = get_accuracy_test(interactions_train_orig, model, rows)
        accur = accur + correct
                
        loss.backward()
    reg_loss_func.step()
    
    epoch_loss = running_loss/interactions_train.shape[0]
    print ('train_loss: ' + str(epoch_loss))
            
    print ('accuracy:' + str(accur/len(checks[0])))
            
    if epoch_loss < best_train_loss:
        best_train_loss = epoch_loss
        print ('checkpoint model with train loss: ' + str(best_train_loss))
        checkpoint_model(epoch_loss, model, save_path)
        
    val_loss, val_accuracy = run_validation_test(model, interactions_test, interactions_test_orig, batch_size, save_path)
    print ('val loss: ' + str(val_loss))
    print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        print ('checkpointing_model with val loss: ' + str(best_val_loss))
        checkpoint_model(val_loss, model, save_path_val)

Epoch: 0
train_loss: 1.567506105364824
accuracy:0.3459715639810427
checkpoint model with train loss: 1.567506105364824
val loss: 1.5124519146743196
val accuracy: 0.3759124087591241
checkpointing_model with val loss: 1.5124519146743196
Epoch: 1
train_loss: 1.5565394030095532
accuracy:0.4028436018957346
checkpoint model with train loss: 1.5565394030095532
val loss: 1.4906398780046017
val accuracy: 0.33211678832116787
checkpointing_model with val loss: 1.4906398780046017
Epoch: 2
train_loss: 1.5552617994971292
accuracy:0.3696682464454976
checkpoint model with train loss: 1.5552617994971292
val loss: 1.485228029984551
val accuracy: 0.3795620437956204
checkpointing_model with val loss: 1.485228029984551
Epoch: 3
train_loss: 1.531705219616669
accuracy:0.39652448657187994
checkpoint model with train loss: 1.531705219616669
val loss: 1.5070665491893493
val accuracy: 0.3357664233576642
Epoch: 4
train_loss: 1.5577357901601183
accuracy:0.3744075829383886
val loss: 1.521157226715985
val accuracy: 

In [164]:
recs = generate_predictions(model, interactions_full_orig, save_path_val)
results = give_top5_cuisines(recs, rest_deets, rest_cuisine, user_list, item_list)

In [165]:
df = pd.DataFrame(results,columns = col_names)

In [171]:
len(df['Rec5'].value_counts().index.tolist())

31

In [101]:
save_path_b = './project_models/bmodel.ckpt'
save_path_val_b = './project_models/bmodelv.ckpt'

In [173]:
mean = np.mean(interactions_train_orig)

In [103]:
best_val_loss = 1000.0
for f in range(2):
    print ("FACTOR: " + str(factors[f]))
    for r in range(len(decays)):
        print ("Decay: " + str(decays[r]))
        for l in range(len(LR)):
            print ('LR: '+ str(LR[l]))
            best_train_loss = 1000.0
            accur = 0
            model = BiasedMatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1], mean,n_factors=factors[f])
            loss_func = torch.nn.MSELoss()
            reg_loss_func = torch.optim.Adam(model.parameters(), lr=LR[l], weight_decay=decays[r])
            for i in range(5):
                print ("Epoch: " + str(i))
                running_loss = 0.0
                accur = 0
                for m, batch in enumerate(get_batch(batch_size, interactions_train_orig)):
                    reg_loss_func.zero_grad()
                    interactions = Variable(torch.FloatTensor(interactions_train_orig[batch,:].toarray()))
                    rows = Variable(torch.LongTensor(batch))
                    cols = Variable(torch.LongTensor(np.arange(interactions_train_orig.shape[1])))
                    predictions = model(rows, cols)
        
                    loss = loss_func(predictions, interactions)
                    running_loss += np.sqrt(loss.data[0])*batch_size
                
                    correct = get_accuracy_test(interactions_train_orig, model, rows)
                    accur = accur + correct
                
                    loss.backward()
                reg_loss_func.step()
    
                epoch_loss = running_loss/interactions_train.shape[0]
                print ('train_loss: ' + str(epoch_loss))
            
                print ('accuracy:' + str(accur/len(checks[0])))
            
                if epoch_loss < best_train_loss:
                    best_train_loss = epoch_loss
                    print ('checkpoint model with train loss: ' + str(best_train_loss))
                    checkpoint_model(epoch_loss, model, save_path_b)
        
                val_loss, val_accuracy = run_validation(model,interactions_test_orig, batch_size, save_path_b)
                print ('val loss: ' + str(val_loss))
                print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    print ('checkpointing_model with val loss: ' + str(best_val_loss))
                    checkpoint_model(val_loss, model, save_path_val_b)

FACTOR: 1
Decay: 0.1
LR: 1e-06
Epoch: 0
train_loss: 1.8425013302623783
accuracy:0.37598736176935227
checkpoint model with train loss: 1.8425013302623783
val loss: 1.832327844538434
val accuracy: 0.3978102189781022
checkpointing_model with val loss: 1.832327844538434
Epoch: 1
train_loss: 1.8560644311717014
accuracy:0.3807266982622433
val loss: 1.869869199053443
val accuracy: 0.3686131386861314
Epoch: 2
train_loss: 1.8525915187863107
accuracy:0.40442338072669826
val loss: 1.8437195432557028
val accuracy: 0.3795620437956204
Epoch: 3
train_loss: 1.8748358595242056
accuracy:0.3902053712480253
val loss: 1.8566475258958597
val accuracy: 0.4051094890510949
Epoch: 4
train_loss: 1.8479241791585013
accuracy:0.3933649289099526
val loss: 1.8424823363269014
val accuracy: 0.38686131386861317
LR: 1e-05
Epoch: 0
train_loss: 1.8248376447555783
accuracy:0.3696682464454976
checkpoint model with train loss: 1.8248376447555783
val loss: 1.8187134776477063
val accuracy: 0.4051094890510949
checkpointing_model

val loss: 1.8826623402820806
val accuracy: 0.3357664233576642
LR: 0.0001
Epoch: 0
train_loss: 1.6929843179340653
accuracy:0.353870458135861
checkpoint model with train loss: 1.6929843179340653
val loss: 1.676082195448481
val accuracy: 0.3175182481751825
Epoch: 1
train_loss: 1.7017309276944643
accuracy:0.3696682464454976
val loss: 1.6864375837676286
val accuracy: 0.4124087591240876
Epoch: 2
train_loss: 1.693500030819232
accuracy:0.382306477093207
val loss: 1.6817328662876752
val accuracy: 0.354014598540146
Epoch: 3
train_loss: 1.7010891489018813
accuracy:0.37598736176935227
val loss: 1.662127773922467
val accuracy: 0.38686131386861317
Epoch: 4
train_loss: 1.6964255843049854
accuracy:0.3412322274881517
val loss: 1.673049785144697
val accuracy: 0.3795620437956204
LR: 0.001
Epoch: 0
train_loss: 1.6976704219572447
accuracy:0.43601895734597157
checkpoint model with train loss: 1.6976704219572447
val loss: 1.7105845736468313
val accuracy: 0.4051094890510949
Epoch: 1
train_loss: 1.719778140810

val loss: 1.875139521779408
val accuracy: 0.44525547445255476
Epoch: 2
train_loss: 1.8854883258803075
accuracy:0.35545023696682465
val loss: 1.890599884882516
val accuracy: 0.36496350364963503
Epoch: 3
train_loss: 1.8700450661849481
accuracy:0.3933649289099526
val loss: 1.8851063104051808
val accuracy: 0.38686131386861317
Epoch: 4
train_loss: 1.886894662301969
accuracy:0.36808846761453395
val loss: 1.8987034101408495
val accuracy: 0.39416058394160586
LR: 1e-05
Epoch: 0
train_loss: 2.0121528486145226
accuracy:0.32701421800947866
checkpoint model with train loss: 2.0121528486145226
val loss: 1.9832069903699832
val accuracy: 0.31386861313868614
Epoch: 1
train_loss: 1.9914287048967827
accuracy:0.40126382306477093
checkpoint model with train loss: 1.9914287048967827
val loss: 1.9621661825765555
val accuracy: 0.33211678832116787
Epoch: 2
train_loss: 1.9882583932341262
accuracy:0.36492890995260663
checkpoint model with train loss: 1.9882583932341262
val loss: 1.9801024933847784
val accuracy: 

In [183]:
#original ratings
best_val_loss = 1000.0
best_train_loss = 1000.0
accur = 0
model = BiasedMatrixFactorization(interactions_train_orig.shape[0], interactions_train_orig.shape[1], mean, n_factors=2)
loss_func = torch.nn.MSELoss()
reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=0.001)
for i in range(5):
    print ("Epoch: " + str(i))
    running_loss = 0.0
    accur = 0
    #best_train_loss = 1000.0
    for m, batch in enumerate(get_batch(batch_size, interactions_train_orig)):
        reg_loss_func.zero_grad()
        interactions = Variable(torch.FloatTensor(interactions_train_orig[batch,:].toarray()))
        rows = Variable(torch.LongTensor(batch))
        cols = Variable(torch.LongTensor(np.arange(interactions_train_orig.shape[1])))
        predictions = model(rows, cols)
        
        loss = loss_func(predictions, interactions)
        running_loss += np.sqrt(loss.data[0])*batch_size
                
        correct = get_accuracy_test(interactions_train_orig, model, rows)
        accur = accur + correct
                
        loss.backward()
    reg_loss_func.step()
    
    epoch_loss = running_loss/interactions_train.shape[0]
    print ('train_loss: ' + str(epoch_loss))
            
    print ('accuracy:' + str(accur/len(checks[0])))
            
    if epoch_loss < best_train_loss:
        best_train_loss = epoch_loss
        print ('checkpoint model with train loss: ' + str(best_train_loss))
        checkpoint_model(epoch_loss, model, save_path_b)
        
    val_loss, val_accuracy = run_validation(model,interactions_test_orig, batch_size, save_path_b)
    print ('val loss: ' + str(val_loss))
    print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        print ('checkpointing_model with val loss: ' + str(best_val_loss))
        checkpoint_model(val_loss, model, save_path_val_b)

Epoch: 0
train_loss: 2.017785400000019
accuracy:0.36808846761453395
checkpoint model with train loss: 2.017785400000019
val loss: 2.00600047184711
val accuracy: 0.354014598540146
checkpointing_model with val loss: 2.00600047184711
Epoch: 1
train_loss: 2.0221710662689203
accuracy:0.330173775671406
val loss: 1.9989182770343323
val accuracy: 0.3357664233576642
checkpointing_model with val loss: 1.9989182770343323
Epoch: 2
train_loss: 2.023214779544221
accuracy:0.35071090047393366
val loss: 1.990346161214133
val accuracy: 0.34306569343065696
checkpointing_model with val loss: 1.990346161214133
Epoch: 3
train_loss: 2.020910187721196
accuracy:0.33175355450236965
val loss: 1.9930204177485842
val accuracy: 0.3613138686131387
Epoch: 4
train_loss: 2.030213675217824
accuracy:0.34755134281200634
val loss: 1.9945363559476794
val accuracy: 0.3613138686131387


In [119]:
mean = np.mean(interactions_train)

In [120]:
best_val_loss = 1000.0
for f in range(2):
    print ("FACTOR: " + str(factors[f]))
    for r in range(len(decays)):
        print ("Decay: " + str(decays[r]))
        for l in range(len(LR)):
            print ('LR: '+ str(LR[l]))
            best_train_loss = 1000.0
            accur = 0
            model = BiasedMatrixFactorization(interactions_train.shape[0], interactions_train.shape[1], mean, n_factors=factors[f])
            loss_func = torch.nn.MSELoss()
            reg_loss_func = torch.optim.Adam(model.parameters(), lr=LR[l], weight_decay=decays[r])
            for i in range(5):
                print ("Epoch: " + str(i))
                running_loss = 0.0
                accur = 0
                for m, batch in enumerate(get_batch(batch_size, interactions_train)):
                    reg_loss_func.zero_grad()
                    interactions = Variable(torch.FloatTensor(interactions_train[batch,:].toarray()))
                    rows = Variable(torch.LongTensor(batch))
                    cols = Variable(torch.LongTensor(np.arange(interactions_train.shape[1])))
                    predictions = model(rows, cols)
        
                    loss = loss_func(predictions, interactions)
                    running_loss += np.sqrt(loss.data[0])*batch_size
                
                    correct = get_accuracy_test(interactions_train_orig, model, rows)
                    accur = accur + correct
                
                    loss.backward()
                reg_loss_func.step()
    
                epoch_loss = running_loss/interactions_train.shape[0]
                print ('train_loss: ' + str(epoch_loss))
            
                print ('accuracy:' + str(accur/len(checks[0])))
            
                if epoch_loss < best_train_loss:
                    best_train_loss = epoch_loss
                    print ('checkpoint model with train loss: ' + str(best_train_loss))
                    checkpoint_model(epoch_loss, model, save_path_b)
        
                val_loss, val_accuracy = run_validation_test(model, interactions_test, interactions_test_orig, batch_size, save_path_b)
                print ('val loss: ' + str(val_loss))
                print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    print ('checkpointing_model with val loss: ' + str(best_val_loss))
                    checkpoint_model(val_loss, model, save_path_val_b)

FACTOR: 1
Decay: 0.1
LR: 1e-06
Epoch: 0
train_loss: 1.852571810194077
accuracy:0.4075829383886256
checkpoint model with train loss: 1.852571810194077
val loss: 1.8263915199137095
val accuracy: 0.3722627737226277
checkpointing_model with val loss: 1.8263915199137095
Epoch: 1
train_loss: 1.8523846179149601
accuracy:0.41390205371248023
checkpoint model with train loss: 1.8523846179149601
val loss: 1.8274396629666434
val accuracy: 0.3613138686131387
Epoch: 2
train_loss: 1.8839186768976603
accuracy:0.4028436018957346
val loss: 1.8337168405761028
val accuracy: 0.3686131386861314
Epoch: 3
train_loss: 1.839028310228191
accuracy:0.4186413902053712
checkpoint model with train loss: 1.839028310228191
val loss: 1.8273346189874067
val accuracy: 0.3978102189781022
Epoch: 4
train_loss: 1.8602719143854003
accuracy:0.3949447077409163
val loss: 1.8065769778288931
val accuracy: 0.3759124087591241
checkpointing_model with val loss: 1.8065769778288931
LR: 1e-05
Epoch: 0
train_loss: 1.875910875715104
accura

train_loss: 1.8495485700066627
accuracy:0.320695102685624
checkpoint model with train loss: 1.8495485700066627
val loss: 1.8366752410211051
val accuracy: 0.35766423357664234
Epoch: 3
train_loss: 1.8724051281925334
accuracy:0.33649289099526064
val loss: 1.830485967522655
val accuracy: 0.34306569343065696
Epoch: 4
train_loss: 1.8624894587765557
accuracy:0.334913112164297
val loss: 1.8340929100447436
val accuracy: 0.35036496350364965
LR: 0.0001
Epoch: 0
train_loss: 1.9021403064352698
accuracy:0.39178515007898895
checkpoint model with train loss: 1.9021403064352698
val loss: 1.826663661657794
val accuracy: 0.38686131386861317
Epoch: 1
train_loss: 1.8903136402782226
accuracy:0.4186413902053712
checkpoint model with train loss: 1.8903136402782226
val loss: 1.8441453015896931
val accuracy: 0.4051094890510949
Epoch: 2
train_loss: 1.910731069383419
accuracy:0.4028436018957346
val loss: 1.856626718626262
val accuracy: 0.3978102189781022
Epoch: 3
train_loss: 1.9053683363604563
accuracy:0.40758293

val loss: 1.9879246719585626
val accuracy: 0.35766423357664234
Epoch: 1
train_loss: 2.0278031349262093
accuracy:0.37598736176935227
checkpoint model with train loss: 2.0278031349262093
val loss: 1.9878967835863532
val accuracy: 0.33941605839416056
Epoch: 2
train_loss: 2.029999898703346
accuracy:0.36334913112164297
val loss: 1.9903108941782042
val accuracy: 0.3357664233576642
Epoch: 3
train_loss: 2.0264012130168934
accuracy:0.34755134281200634
checkpoint model with train loss: 2.0264012130168934
val loss: 1.9796550172044642
val accuracy: 0.33211678832116787
Epoch: 4
train_loss: 2.035806974774789
accuracy:0.3570300157977883
val loss: 1.9949268467045365
val accuracy: 0.35766423357664234
LR: 1e-05
Epoch: 0
train_loss: 2.1045778345192394
accuracy:0.4186413902053712
checkpoint model with train loss: 2.1045778345192394
val loss: 2.0677313446134633
val accuracy: 0.4781021897810219
Epoch: 1
train_loss: 2.1011662172025454
accuracy:0.4423380726698262
checkpoint model with train loss: 2.1011662172

In [205]:
#food + service ratings
best_val_loss = 1000.0
best_train_loss = 1000.0
accur = 0
model = BiasedMatrixFactorization(interactions_train.shape[0], interactions_train.shape[1],mean, n_factors=2)
loss_func = torch.nn.MSELoss()
reg_loss_func = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.01)
for i in range(5):
    print ("Epoch: " + str(i))
    running_loss = 0.0
    accur = 0
    for m, batch in enumerate(get_batch(batch_size, interactions_train)):
        reg_loss_func.zero_grad()
        interactions = Variable(torch.FloatTensor(interactions_train[batch,:].toarray()))
        rows = Variable(torch.LongTensor(batch))
        cols = Variable(torch.LongTensor(np.arange(interactions_train.shape[1])))
        predictions = model(rows, cols)
        
        loss = loss_func(predictions, interactions)
        running_loss += np.sqrt(loss.data[0])*batch_size
                
        correct = get_accuracy_test(interactions_train_orig, model, rows)
        accur = accur + correct
                
        loss.backward()
    reg_loss_func.step()
    
    epoch_loss = running_loss/interactions_train.shape[0]
    print ('train_loss: ' + str(epoch_loss))
            
    print ('accuracy:' + str(accur/len(checks[0])))
            
    if epoch_loss < best_train_loss:
        best_train_loss = epoch_loss
        print ('checkpoint model with train loss: ' + str(best_train_loss))
        checkpoint_model(epoch_loss, model, save_path_b)
        
    val_loss, val_accuracy = run_validation_test(model, interactions_test, interactions_test_orig, batch_size, save_path_b)
    print ('val loss: ' + str(val_loss))
    print ('val accuracy: ' + str(val_accuracy/len(checks_v[0])))
        
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        print ('checkpointing_model with val loss: ' + str(best_val_loss))
        checkpoint_model(val_loss, model, save_path_val_b)

Epoch: 0
train_loss: 2.0589895856567324
accuracy:0.3807266982622433
checkpoint model with train loss: 2.0589895856567324
val loss: 2.014102002171507
val accuracy: 0.3795620437956204
checkpointing_model with val loss: 2.014102002171507
Epoch: 1
train_loss: 2.0352470915237335
accuracy:0.41232227488151657
checkpoint model with train loss: 2.0352470915237335
val loss: 2.010590675922687
val accuracy: 0.38686131386861317
checkpointing_model with val loss: 2.010590675922687
Epoch: 2
train_loss: 2.0624499266315643
accuracy:0.4060031595576619
val loss: 2.0150688089678446
val accuracy: 0.3795620437956204
Epoch: 3
train_loss: 2.0780197218153953
accuracy:0.40916271721958924
val loss: 2.0135595040873877
val accuracy: 0.3905109489051095
Epoch: 4
train_loss: 2.0682187608625413
accuracy:0.37756714060031593
val loss: 2.019331210742972
val accuracy: 0.36496350364963503


In [206]:
recs = generate_predictions(model, interactions_full_orig, save_path_val_b)
results = give_top5_cuisines(recs, rest_deets, rest_cuisine, user_list, item_list)

In [207]:
df = pd.DataFrame(results,columns = col_names)

In [212]:
len(df['Rec5'].value_counts().index.tolist())

33

In [228]:
d = [14,17,25,28,33]

In [229]:
np.mean(d)

23.4