In [25]:
# Reference: https://medium.com/radon-dev/item-item-collaborative-filtering-with-binary-or-unary-data-e8f0b465b2c3

import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse

data = pd.read_csv('tripadvisor_restaurant_sample.csv')

In [26]:
restaurants = data[['Restaurant ID', 'Name', 'City', 'State', 'Country', 'Cuisine']]
restaurants.head()

boston_restaurants = restaurants[restaurants['City'] == 'Boston']
boston_restaurants.head()

Unnamed: 0,Restaurant ID,Name,City,State,Country,Cuisine
1415,1047386,Slice Place,Boston,MA,United States,Pizza & Pasta
1416,4161169,Hien Vuong Restaurant,Boston,MA,United States,
1420,1830226,Finagle-A-Bagel,Boston,MA,United States,"American, Bagels"
1422,5043831,Rebecca's Cafe,Boston,MA,United States,
1423,4940207,La Frontera,Boston,MA,United States,


In [27]:
np.random.randint(2, size=10)

array([0, 0, 1, 1, 0, 1, 1, 1, 0, 1])

In [28]:
np.shape(boston_restaurants)

(528, 6)

In [58]:
fake_user_data = []
unique_restaurants_count = len(boston_restaurants['Name'].unique()) # accounts for chains like Dunkin Donuts

# Generate fake user data
for i in range(0, 100):
    new_arr = np.random.randint(2, size=unique_restaurants_count)
    fake_user_data.append(new_arr)

user_df = pd.DataFrame(fake_user_data)
user_df.columns = boston_restaurants['Name'].unique()

user_ids = pd.Series(np.arange(1,101))
user_df['user_id'] = user_ids
user_df.head()

Unnamed: 0,Slice Place,Hien Vuong Restaurant,Finagle-A-Bagel,Rebecca's Cafe,La Frontera,Babushka Deli Incorporated,South Street Pizza,Amazing Geisha Wings,Ankara Cafe 2 - CLOSED,Max Brenner - Boston,...,Nud POB Thai Restaurant,Boat House Deli,Haymarket Pizza,Newcombs,Tresca,Cranberry Cafe,Tacos El Charro Mexican Restaurant,Canary Square,Dot 2 Dot Cafe,user_id
0,0,1,1,1,0,1,0,1,1,0,...,0,0,0,1,0,1,1,1,0,1
1,1,1,0,0,1,0,0,0,1,1,...,1,0,1,0,0,1,0,1,1,2
2,1,0,1,0,0,0,0,1,1,0,...,1,0,0,0,1,1,0,1,1,3
3,1,0,0,1,1,0,1,1,1,1,...,0,1,1,1,0,0,0,1,1,4
4,0,0,0,1,1,1,1,1,1,1,...,1,0,1,0,0,1,0,0,0,5


In [59]:
data_items = user_df.drop('user_id', 1)
data_items.head()

Unnamed: 0,Slice Place,Hien Vuong Restaurant,Finagle-A-Bagel,Rebecca's Cafe,La Frontera,Babushka Deli Incorporated,South Street Pizza,Amazing Geisha Wings,Ankara Cafe 2 - CLOSED,Max Brenner - Boston,...,Deadwood Cafe and Brewery,Nud POB Thai Restaurant,Boat House Deli,Haymarket Pizza,Newcombs,Tresca,Cranberry Cafe,Tacos El Charro Mexican Restaurant,Canary Square,Dot 2 Dot Cafe
0,0,1,1,1,0,1,0,1,1,0,...,0,0,0,0,1,0,1,1,1,0
1,1,1,0,0,1,0,0,0,1,1,...,1,1,0,1,0,0,1,0,1,1
2,1,0,1,0,0,0,0,1,1,0,...,0,1,0,0,0,1,1,0,1,1
3,1,0,0,1,1,0,1,1,1,1,...,0,0,1,1,1,0,0,0,1,1
4,0,0,0,1,1,1,1,1,1,1,...,1,1,0,1,0,0,1,0,0,0


In [60]:
#------------------------
# ITEM-ITEM CALCULATIONS
#------------------------

# magnitude = sqrt(x2 + y2 + z2 + ...)
magnitude = np.sqrt(np.square(data_items).sum(axis=1))

# unitvector = (x / magnitude, y / magnitude, z / magnitude, ...)
data_items = data_items.divide(magnitude, axis='index')

def calculate_similarity(data_items):
    """Calculate the column-wise cosine similarity for a sparse
    matrix. Return a new dataframe matrix with similarities.
    """
    data_sparse = sparse.csr_matrix(data_items)
    similarities = cosine_similarity(data_sparse.transpose())
    sim = pd.DataFrame(data=similarities, index= data_items.columns, columns= data_items.columns)
    return sim

In [61]:
# Build the similarity matrix
data_matrix = calculate_similarity(data_items)
data_matrix

Unnamed: 0,Slice Place,Hien Vuong Restaurant,Finagle-A-Bagel,Rebecca's Cafe,La Frontera,Babushka Deli Incorporated,South Street Pizza,Amazing Geisha Wings,Ankara Cafe 2 - CLOSED,Max Brenner - Boston,...,Deadwood Cafe and Brewery,Nud POB Thai Restaurant,Boat House Deli,Haymarket Pizza,Newcombs,Tresca,Cranberry Cafe,Tacos El Charro Mexican Restaurant,Canary Square,Dot 2 Dot Cafe
Slice Place,1.000000,0.534755,0.407488,0.548650,0.271837,0.414737,0.541861,0.571780,0.521899,0.632912,...,0.497853,0.430769,0.593959,0.554405,0.448618,0.512356,0.457233,0.421503,0.611564,0.606241
Hien Vuong Restaurant,0.534755,1.000000,0.540655,0.560245,0.407537,0.406188,0.550638,0.507355,0.588779,0.535810,...,0.441310,0.512056,0.474976,0.529287,0.496793,0.539340,0.526563,0.490254,0.568586,0.525301
Finagle-A-Bagel,0.407488,0.540655,1.000000,0.408026,0.501636,0.457770,0.505783,0.480197,0.560904,0.455215,...,0.407492,0.524339,0.420400,0.537423,0.609728,0.530029,0.594975,0.456847,0.484553,0.324999
Rebecca's Cafe,0.548650,0.560245,0.408026,1.000000,0.416410,0.481673,0.557169,0.593508,0.562973,0.665572,...,0.437272,0.558018,0.405317,0.574086,0.507898,0.530586,0.439906,0.505874,0.596452,0.460525
La Frontera,0.271837,0.407537,0.501636,0.416410,1.000000,0.537705,0.476115,0.415777,0.474138,0.547934,...,0.431588,0.539372,0.353413,0.403070,0.409607,0.414514,0.466045,0.442144,0.410190,0.235037
Babushka Deli Incorporated,0.414737,0.406188,0.457770,0.481673,0.537705,1.000000,0.457530,0.538694,0.458531,0.543127,...,0.379414,0.543185,0.468529,0.422254,0.454274,0.328548,0.514005,0.467287,0.454228,0.326616
South Street Pizza,0.541861,0.550638,0.505783,0.557169,0.476115,0.457530,1.000000,0.546447,0.513051,0.587859,...,0.516984,0.600172,0.442550,0.554213,0.620064,0.600885,0.472647,0.354015,0.617609,0.541448
Amazing Geisha Wings,0.571780,0.507355,0.480197,0.593508,0.415777,0.538694,0.546447,1.000000,0.643935,0.691699,...,0.508860,0.513895,0.495724,0.527428,0.480278,0.499794,0.511304,0.479628,0.564010,0.535502
Ankara Cafe 2\n- CLOSED,0.521899,0.588779,0.560904,0.562973,0.474138,0.458531,0.513051,0.643935,1.000000,0.503876,...,0.470796,0.637130,0.458180,0.564955,0.503112,0.503530,0.544115,0.498936,0.475963,0.475229
Max Brenner - Boston,0.632912,0.535810,0.455215,0.665572,0.547934,0.543127,0.587859,0.691699,0.503876,1.000000,...,0.526764,0.587136,0.436721,0.590278,0.505758,0.604778,0.480992,0.510742,0.515470,0.584689


In [66]:
data_matrix.loc['Super 88 Market Food Court'].nlargest(11)

Super 88 Market Food Court        1.000000
Ashburton Cafe                    0.676163
Ro-Lin's Breakfast and Lunch      0.670952
Cafe Brazil                       0.654836
Hau Giang Restaurant              0.646549
Upper Crust                       0.636060
Great Wok Restaurants             0.635334
California Pizza Kitchen          0.633151
Torch\n- CLOSED                   0.630748
Dominic's Restaurant\n- CLOSED    0.629318
Espresso Love - Boston            0.628396
Name: Super 88 Market Food Court, dtype: float64

In [65]:
#------------------------
# USER-ITEM CALCULATIONS
#------------------------

user = 85 # The id of the user for whom we want to generate recommendations
user_index = user_df[user_df['user_id'] == user].index.tolist()[0] # Get the frame index

# Get the restaurants the user has visited
known_user_visits = data_items.iloc[user_index]
known_user_visits = known_user_visits[known_user_visits > 0].index.values

# Users visits for all restaurants as a sparse vector
user_rating_vector = data_items.iloc[user_index]

# Calculate the score.
score = data_matrix.dot(user_rating_vector).div(data_matrix.sum(axis=1))

# Remove the known likes from the recommendation
score = score.drop(known_user_visits)

# Print the known likes and the top 20 recommendations
print(known_user_visits)
print(score.nlargest(20))

['Slice Place' 'Finagle-A-Bagel' "Rebecca's Cafe" 'La Frontera'
 'Babushka Deli Incorporated' 'Ankara Cafe 2\n- CLOSED'
 'Max Brenner - Boston' 'Trident Booksellers & Cafe' 'Wrapme'
 'Ghalal Restaurant' 'Great Wok Restaurants'
 'University House of Pizza\n- CLOSED' 'Boston Kabob Company'
 'Momogoose Foodtruck' "Harry's Bar and Grille" 'Green Bean Mobile Cafe'
 'Camino Real Colombian Restaurant' "New Speedy Chen's" 'Street Thai'
 "Toni Ann's" "McDonald's" 'Mulligans' 'Cote' "Canestaro's"
 'Capitol Coffee House' 'Breakfast restaurants' "Dunkin' Donuts"
 'Temptations Cafe' 'Chung Wah Restaurant' 'Forest Hills Pizza'
 'Five Guys' "Sculler's Green Room" 'Trattoria Newbury'
 'Derne Street Deli' 'dbar' 'Boston and Maine Fish Co.'
 "Kelly's Cork N Bull\n- CLOSED" 'Sheesha Lounge' 'Van Shabu & Bar'
 'Cafe Gigu' 'Mamma Maria' 'Black Rose' 'Chau Chow City Restaurant'
 'The Mission Bar & Grill' "Mario's Sub & Salads" 'Ashburton Cafe'
 'Blue Inc' "Dominic's Restaurant\n- CLOSED" 'Amory Restaurant'
