In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
from sklearn.externals import joblib
import warnings
warnings.filterwarnings('ignore')

In [2]:
business_review = joblib.load('business_review')
# only business with > 20 review count
MIN_REVIEW_COUNT = 20
reduced_br = business_review[business_review.review_count > 20]


# only taking users that made more than 10 reviews
users = business_review.groupby('user_id').business_id.count()
user_ids = users[users > 10].index
reduced_br = reduced_br[reduced_br.user_id.apply(lambda x: x in user_ids)]

reduced_br.shape

(593997, 73)

In [3]:
from sklearn.preprocessing import LabelEncoder
# Label Encode user and business
labelencoder = LabelEncoder()
reduced_br.business_id = labelencoder.fit_transform(reduced_br.business_id)

reduced_br.user_id = labelencoder.fit_transform(reduced_br.user_id)

In [4]:
from sklearn.model_selection import train_test_split
r_cols = ['business_id','stars_x', 'user_id']
df = reduced_br[r_cols]

X_train,y_train,X_test,y_test = train_test_split(df,df['stars_x'])

In [5]:
import turicreate

train_data = turicreate.SFrame(X_train)
test_data = turicreate.SFrame(X_test)
pop_model = turicreate.popularity_recommender.create(
    train_data,                                             
    user_id='user_id',                                                   
    item_id='business_id',                                                 
    target='stars_x')

In [6]:
pop_model_rec = pop_model.recommend(k=5)
pop_model_rec.print_rows(num_rows=25)

+---------+-------------+-------+------+
| user_id | business_id | score | rank |
+---------+-------------+-------+------+
|  16598  |    15146    |  5.0  |  1   |
|  16598  |     1560    |  5.0  |  2   |
|  16598  |    10992    |  5.0  |  3   |
|  16598  |     1772    |  5.0  |  4   |
|  16598  |     6672    |  5.0  |  5   |
|  11047  |    15146    |  5.0  |  1   |
|  11047  |     1560    |  5.0  |  2   |
|  11047  |    10992    |  5.0  |  3   |
|  11047  |     1772    |  5.0  |  4   |
|  11047  |     6672    |  5.0  |  5   |
|   9482  |    15146    |  5.0  |  1   |
|   9482  |     1560    |  5.0  |  2   |
|   9482  |    10992    |  5.0  |  3   |
|   9482  |     1772    |  5.0  |  4   |
|   9482  |     6672    |  5.0  |  5   |
|   7893  |    15146    |  5.0  |  1   |
|   7893  |     1560    |  5.0  |  2   |
|   7893  |    10992    |  5.0  |  3   |
|   7893  |     1772    |  5.0  |  4   |
|   7893  |     6672    |  5.0  |  5   |
|   5013  |    15146    |  5.0  |  1   |
|   5013  |     

In [10]:
df.drop_duplicates().pivot(
    index='user_id',
    columns='business_id',
    values='stars_x').fillna(0)

business_id,0,1,2,3,4,5,6,7,8,9,...,29100,29101,29102,29103,29104,29105,29106,29107,29108,29109
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
