In [1]:
import numpy as np 
import pandas as pd
import os
import sys

In [80]:
from surprise import BaselineOnly
from surprise import Dataset
from surprise import dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy
from surprise.model_selection import KFold
from surprise import KNNBasic
from surprise.model_selection import cross_validate, train_test_split
from time import time
import flask
from flask import Flask,jsonify

In [54]:
reviews_filename = 'final_english_reviews.csv'
users_filename = 'Users_ON.csv'
restaurants_filename = 'Restuarants_ON.csv'

def prepareData(reviews_filename,users_filename,restaurants_filename,nrows):
    reviewData = pd.read_csv(reviews_filename).sample(nrows,random_state = 42)
    userData = pd.read_csv(users_filename)
    restaurantData = pd.read_csv(restaurants_filename)
    restaurantData = restaurantData.loc[restaurantData['business_id'].isin(reviewData['business_id'])]
    userData = userData.loc[userData['user_id'].isin(reviewData['user_id'])]
   
    return reviewData,userData,restaurantData

In [55]:
reviewData,userData,restaurantData = prepareData(reviews_filename,users_filename,restaurants_filename,1000)

In [56]:
reader = Reader(rating_scale=(1, 5))
input_df = pd.read_csv("final_english_reviews.csv",nrows=100)
ldata = Dataset.load_from_df(reviewData[['user_id', 'business_id', 'stars']],reader)

algo = SVD()
ldata.df.head()

Unnamed: 0,user_id,business_id,stars
165612,4D6LLuJfao_eHGA6XZR-bA,CB8HrynUWR4Odnj-XTY-Ew,5
129588,A-IkCqnYosZa49XD9qiSww,djqiu4L7P2sF1JY0DP7UHQ,2
91326,2e5V6M4GNufEnbGJpVdCjw,WGmeYEd6Kd3rHWVMO2IM6A,3
25414,eKD0oCdjDxwn0IN6552oyQ,-av1lZI1JDY_RZN2eTMnWg,4
9570,BRyMkJp6ge2dvoxWUUJYcw,cpR25rH3tdth2oiM9a_07g,4


In [57]:
print(algo)
sys.getsizeof(algo)

<surprise.prediction_algorithms.matrix_factorization.SVD object at 0x116ef9a90>


56

In [58]:
ldata.df['stars'].value_counts()

4    325
5    263
3    204
2    105
1    103
Name: stars, dtype: int64

In [59]:
start = time()
a = cross_validate(algo, ldata, measures=['RMSE', 'MAE'], cv=5, verbose=True)
end = time()
print((end - start)/60)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.2912  1.2600  1.2800  1.2887  1.1912  1.2622  0.0372  
MAE (testset)     1.0915  1.0625  1.0798  1.0887  0.9871  1.0619  0.0387  
Fit time          0.07    0.07    0.06    0.07    0.06    0.07    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    
0.006003661950429281


In [60]:
print(a)

{'test_rmse': array([1.29124871, 1.25999339, 1.27999654, 1.28872789, 1.19117772]), 'test_mae': array([1.09148253, 1.06247008, 1.07976702, 1.08870285, 0.9871095 ]), 'fit_time': (0.07261228561401367, 0.06597900390625, 0.06456804275512695, 0.06680488586425781, 0.06454205513000488), 'test_time': (0.002239227294921875, 0.0019519329071044922, 0.0019378662109375, 0.0022389888763427734, 0.0020532608032226562)}


In [61]:
start = time()
data = ldata.build_full_trainset()
prd = data.build_anti_testset()
end = time()
print((end - start)/60)

0.014036683241526286


In [62]:
len(prd)

844031

In [63]:
sys.getsizeof(prd)

7310616

In [64]:
start = time()
result = []
for row in prd:
    result.append(algo.predict(row[0], row[1]))
end = time()
print((end - start)/60)

0.20261429945627848


In [65]:
from collections import defaultdict
 
def get_top5_recommendations(predictions, topN = 5):
     
    top_recs = defaultdict(list)
    for uid, iid, r_ui, est, details in predictions:
        top_recs[uid].append((iid, est))
     
    for uid, user_ratings in top_recs.items():
        user_ratings.sort(key = lambda x: x[1], reverse = True)
        top_recs[uid] = user_ratings[:topN]
     
    return top_recs

In [104]:
top5_recommendations = get_top5_recommendations(result)
top5_recommendations

defaultdict(list,
            {'--Qh8yKWAvIP4V4K8ZPfHA': [('v1uIObWcfiQiyr4EmtAixw',
               3.797681320292055),
              ('XlZ2Row0pszDilc5WmXiZQ', 3.725159194500818),
              ('-7BCZH437U5FjmNJ26llkg', 3.716732307959565),
              ('4twpbw7n4DmsLxAm6-sMkg', 3.702610582548498),
              ('lcozzaLLByOKX0Orcg-tIQ', 3.6821508358146704)],
             '-4oa1MpTkt3WJkL5_ZYwNw': [('l-Zjik0MbpabQPv-nNp9YQ',
               3.989449634184929),
              ('w1clLiXOo-yLhNjt5jDBCA', 3.9708015046849012),
              ('MC4becrrM9ntmnfMrD9b0g', 3.9622683511808727),
              ('VA95qAV2PRH9gtTi2Vfv4Q', 3.914536808878586),
              ('R1fkx0YcnRxqT0XpoxW1tA', 3.9060471054226196)],
             '-5vhyFvJdwNYNuQa1UiRlw': [('41o1FUbCYKJv2djtnlkzlg',
               3.795233109951189),
              ('tTzQZTrwBgwxMch77nCJfQ', 3.786258583103063),
              ('-7BCZH437U5FjmNJ26llkg', 3.780121077955747),
              ('w1clLiXOo-yLhNjt5jDBCA', 3.7721594972357018)

In [67]:
def get_top5Bid_for_uid(user_id):
    for uid, user_ratings in top5_recommendations.items():
        if(uid == user_id):
            val =  uid, ([iid for (iid, _) in user_ratings])
            break
    
    return null;

In [78]:
# top5_recommendations

In [105]:
id_to_bid = get_top5Bid_for_uid("--Qh8yKWAvIP4V4K8ZPfHA")
id_to_bid

('--Qh8yKWAvIP4V4K8ZPfHA',
 ['v1uIObWcfiQiyr4EmtAixw',
  'XlZ2Row0pszDilc5WmXiZQ',
  '-7BCZH437U5FjmNJ26llkg',
  '4twpbw7n4DmsLxAm6-sMkg',
  'lcozzaLLByOKX0Orcg-tIQ'])

In [106]:
business_list = id_to_bid[1]

In [107]:
# df_restaurant = restaurantData.loc[restaurantData['business_id'].isin(business_list)]
ranking_dict = {}
for i in range(1,len(business_list)+1):
    ranking_dict[business_list[i-1]] = i

In [110]:
ranking_dict.get('-7BCZH437U5FjmNJ26llkg')

3

In [None]:
import json

data = {}
data['user_id'] = id_to_bid[0]
data['business_list'] = restaurant_details
json_data = json.dumps(data)

In [None]:
import os, io
 
def read_item_names():
    """Read the u.item file from MovieLens 100-k dataset and returns a
    mapping to convert raw ids into movie names.
    """
    bid_to_name = {}
    businessname = pd.read_csv("Restuarants_ON.csv")
    for index, row in businessname.iterrows():
        bid_to_name[row['business_id']] = row['name']

    return bid_to_name

In [None]:
bid_to_name = read_item_names()

In [None]:
def get_top5Bnames_for_uid(user_id):
    for uid, user_ratings in top5_recommendations.items():
        if(uid == user_id):
            return uid, ([bid_to_name[iid] for (iid, _) in user_ratings])
    return null;

In [None]:
id_to_bname = get_top5Bnames_for_uid("qF3nPpord771rPspEpir4Q")
id_to_bname

In [None]:
def get_top5Bid_for_uid(user_id):
    for uid, user_ratings in top5_recommendations.items():
        if(uid == user_id):
            return uid, ([iid for (iid, _) in user_ratings])
    return null;

In [None]:
id_to_bid = get_top5Bid_for_uid("qF3nPpord771rPspEpir4Q")
id_to_bid

In [None]:
for i in id_to_bname[1]:
    print(i)