### Python Modules & configurations

In [26]:
import pickle # Serialization and deserialization
import pandas as pd # Package to manage dataframe like in R
import numpy as np # Scientific computing package
import random

import matplotlib.pyplot as plt # Basic package for statistical data visualization
import seaborn as sns # Advanced package for statistical data visualization

# In order to display figures inside the notebook:
%matplotlib inline

# Display trick to display all columns of large dataframes
from IPython.display import display
pd.set_option('display.max_columns', None)

In [27]:
# Shared folder to read saved data
SAVED_FOLDER = './data'

# Your folder to write new data and create notebooks.
MY_FOLDER = './output'

### Data Import

In [28]:
# import recommandation ranks
LS_user_output = pd.read_csv('./output/LS_user_output.txt', delimiter = ' ')
# import friends infos
user_user = pd.read_csv('./output/sample_user.txt', delimiter = ' ', header=None)
user_user.columns = ['user1','user2','friends']
# import distance infos
testset_df = pd.read_pickle('{}/testset_df.pickle'.format(MY_FOLDER))
# import user infos
user_id_list = pd.read_pickle('{}/user_id_list.pickle'.format(MY_FOLDER))
# import user dataframe
users_df = pd.read_pickle('{}/users_df.pickle'.format(MY_FOLDER))
# import reviews infos
selected_review_df = pd.read_pickle('{}/selected_review_df.pickle'.format(MY_FOLDER))
# import tips
tip_df = pd.read_pickle('{}/tip.pickle'.format(SAVED_FOLDER))
# import business details
business_df = pd.read_pickle('{}/business.pickle'.format(SAVED_FOLDER))
business_user_df = business_df.copy()
business_user_df.head(2)

Unnamed: 0,attributes,business_id,categories,city,full_address,hours,id,latitude,longitude,name,neighborhoods,open,review_count,stars,state,type
0,{},w_vBsXaz-XwyN5O_uYRh8Q,"[Food, Desserts]",Las Vegas,"Las Vegas, NV",{},00097e13-63f6-4024-85e6-84382d5fef11,36.175,-115.136389,Muffcakes Bakery,[],True,4,4.5,NV,business
1,"{'Good For': {'dinner': False, 'dessert': Fals...",OCmmPs_5NBt65ZY7OuG-lA,"[Sandwiches, Restaurants]",Phoenix,"3300 N Central Ave Lbby\nPhoenix, AZ 85012",{},0002ff96-b75e-4bd1-a293-2806787991b6,33.648317,-112.119373,Sam's Deli Mart,[],True,3,4.0,AZ,business


### Recommandation

#### Get user info

In [29]:
user_list = list(set(LS_user_output['userId'].tolist()))
item_list = list(set(LS_user_output['itemId'].tolist()))
LS_user_output = LS_user_output.drop(columns = ['original'])

In [77]:
# Get top K recommand dataframe
k = 100
topK_df = pd.DataFrame()
for user in user_list:
    temp_user_df = pd.DataFrame()
    temp_user_df = LS_user_output[LS_user_output['userId'] == user].sort_values(['prediction'], ascending = False)
    temp_user_df = temp_user_df.iloc[:k, :]
    topK_df = topK_df.append(temp_user_df)
topK_df.head()

Unnamed: 0,userId,itemId,prediction
16,3,23585,5.0
41,3,55229,4.64
40,3,53184,4.621
11,3,10788,4.587
3,3,3427,4.571


In [87]:
# Here we take a random user as an example
number_of_user = random.randint(0, len(user_list) - 1)
target_user_id = user_list[number_of_user]
target_user_code = user_id_list[number_of_user]
print('Choosed user',number_of_user,', user\'s index is',target_user_id,', user\'s id is',target_user_code)

Choosed user 71 user's index is 160 user's id is btqgtZAxkp90Y7Wo63NAPw


#### Extract necessary information

In [88]:
# User's Top k items list
topK_item_list = topK_df[topK_df['userId'] == target_user_id]['itemId'].tolist()
print('User\'s top',k ,'items numbers :',topK_item_list)
# Distance dataframe for user
testset_df_user = testset_df[testset_df['user'] == target_user_code]
# Get user's friends list
target_user_friends_list = user_user[user_user['user1'] == target_user_id]['user2'].tolist()
print('Target user\'s friends list',target_user_friends_list)
# Get tips
tips = []
for i in range(k):
    if i < len(topK_item_list):
        temp_business = business_user_df.loc[topK_item_list[i]]['business_id']
        temp_tip_df = tip_df[tip_df['business_id'] == temp_business]
        tips.append([i, temp_tip_df['text'].tolist()])
# Find the relationship between his friends and items
target_user_friends_code = users_df.iloc[target_user_friends_list]['user_id'].tolist()
item_rec_by_friend = []
for i in range(k):
    if i < len(topK_item_list):
        temp_review = selected_review_df[selected_review_df['business_id'] == business_user_df.iloc[topK_item_list[i]]['business_id']]
        item_by_friend = temp_review[temp_review['user_id'].isin(target_user_friends_code)]['user_id'].tolist()
        if len(item_by_friend) != 0:
            item_rec_by_friend.append([i,item_by_friend])
print('These items recommended by friends :', item_rec_by_friend)

User's top 100 items numbers : [982, 48841, 58157, 57592, 56965, 50570, 50177, 50111, 50087, 46890, 25018, 46147, 44869, 43744, 1742, 29888, 28895, 28409, 58197, 58435, 59182, 59812, 77259, 76085, 75374, 74464, 73380, 71950, 70574, 70478, 70366, 67309, 64346, 62945, 62675, 61195, 60671, 27457, 77348, 24832, 7411, 9530, 4917, 9710, 10927, 12372, 20129, 3738, 6922, 22218, 23838, 19941, 2871, 19867, 74036, 75471, 3124, 28643, 54453, 27228, 21797, 15679, 44171, 66016, 47017, 54957, 18638, 23889, 41684, 18802, 56959, 30696, 65861, 53845, 37822, 74358, 37653, 49146, 13776, 11156, 28178, 72414, 27549, 30093, 31618, 39139, 66516, 34182, 16775, 14023, 59050, 31041, 33823, 66222, 18503, 40055, 33241, 5941, 52992, 2522]
Target user's friends list [46041, 146406, 194935, 79532, 44105, 133950, 195395, 97627, 35533, 107218, 100494, 188012, 76675, 241204, 211969, 7636, 227583, 24034, 150068, 90808, 9689, 238860, 76978, 205630, 185659, 71407, 181335, 204676, 55375, 245049, 242343, 208676, 133059, 2182

#### Final rocommandation

In [96]:
print('*** Top',k, 'recommandation for user', target_user_id, '***\n')
for i in range(k):
    print('******************************************************************')
    print('**************************Restaurant',i + 1,'***************************')
    print(business_user_df.loc[topK_item_list[i]][['name','categories','city','full_address','hours','neighborhoods','open','stars','state','type']])
    print('Distance to user is', round(testset_df_user[testset_df_user['index'] == topK_item_list[i]]['distance'].iloc[0],2), 'km')
    if len(tips[i][1]) != 0:
        print('Other users\' comments :')
        for tip in tips[i][1]:
            print(tip)
    if len(item_rec_by_friend) != 0:
        for item in item_rec_by_friend:
            if item[0] == i:
                print('Recommanded by friends', item[1])

*** Top 100 recommandation for user 160 ***

******************************************************************
**************************Restaurant 1 ***************************
name                                           La Tradicion
categories                           [Mexican, Restaurants]
city                                              Las Vegas
full_address     4371 Stewart\nSunrise\nLas Vegas, NV 89110
hours                                                    {}
neighborhoods                                     [Sunrise]
open                                                   True
stars                                                   4.5
state                                                    NV
type                                               business
Name: 982, dtype: object
Distance to user is 9.59 km
******************************************************************
**************************Restaurant 2 ***************************
name                              