In [1]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import numpy as np
import pandas as pd

ratingsdf = pd.read_csv("data/rating.csv", sep='|')
userdf = pd.read_csv("data/user.csv", sep='|')
venuedf = pd.read_csv("data/venue.csv", sep='|')

In [5]:
N = len(ratingsdf['userid'].unique())
M = len(ratingsdf['venueid'].unique())

k=10

# Map Ids to indices
user_mapper = dict(zip(np.unique(ratingsdf["userid"]), list(range(N))))
venue_mapper = dict(zip(np.unique(ratingsdf["venueid"]), list(range(M))))

# Map indices to IDs
user_inv_mapper = dict(zip(list(range(N)), np.unique(ratingsdf["userid"])))
venue_inv_mapper = dict(zip(list(range(M)), np.unique(ratingsdf["venueid"])))

user_index = [user_mapper[i] for i in ratingsdf['userid']]
venue_index = [venue_mapper[i] for i in ratingsdf['venueid']]

# create user-item matrix using scipy csr matrix
venueMatrix = csr_matrix((ratingsdf["score"], (venue_index, user_index)), shape=(M, N))
userMatrix = csr_matrix((ratingsdf['score'], (user_index, venue_index)), shape=(N, M))

# target venue
venues_name = dict(zip(venuedf['venueid'], venuedf['name']))  
venue_id = 862

# target user
users_name = dict(zip(userdf['userid'], userdf['name']))
user_id = 3

neighbour_venue_ids = []
neighbour_user_ids = []
        
venue_ind = venue_mapper[venue_id]
user_ind = user_mapper[user_id]
#get target venue from matrix
venue_vec = venueMatrix[venue_ind]
user_vec = userMatrix[user_ind]

k+=1

# find similarity of venue
kNN_v = NearestNeighbors(n_neighbors=k, algorithm="brute", metric='cosine')
kNN_v.fit(venueMatrix)

kNN_u = NearestNeighbors(n_neighbors=k, algorithm="brute", metric='cosine')
kNN_u.fit(userMatrix)

venue_vec = venue_vec.reshape(1, -1)
user_vec = user_vec.reshape(1, -1)

neighbour_of_venue = kNN_v.kneighbors(venue_vec, return_distance=False)
neighbour_of_user = kNN_u.kneighbors(user_vec, return_distance=False)

for i in range(0, k):
    n = neighbour_of_venue.item(i)
    neighbour_venue_ids.append(venue_inv_mapper[n])
neighbour_venue_ids.pop(0)

for i in range(0, k):
    n = neighbour_of_user.item(i)
    neighbour_user_ids.append(user_inv_mapper[n])
neighbour_user_ids.pop(0)

# print
similar_ids = neighbour_venue_ids
venue_name = venues_name[venue_id]
print(f"{k-1} nearlest of venue {venue_name}")
for i in similar_ids:
    print(str(i) + " " + venues_name[i])
print("------------------------------------------------------------------------")
similar_ids = neighbour_user_ids
user_name = users_name[user_id]
print(f"{k-1} nearlest of user {user_name}")
for i in similar_ids:
    print(str(i) + " " +users_name[i])
print("------------------------------------------------------------------------")


10 nearlest of venue พิพิธภัณฑ์สัตว์น้ำจังหวัดหนองคาย
1209 เซ็นทรัล อุดรธานี
3733 สวนสาธารณะ หนองประจักษ์
4092 สวนสัตว์ขอนแก่น
969 ตลาดท่าเสด็จ
4106 TUKCOM LANDMARK UDONTHANI Shopping Plaza
1315 เซ็นทรัล ขอนแก่น
1169 ศูนย์การค้ายูดี ทาวน์
1071 บิ๊กซี อุดรธานี 1
6011 ศูนย์วัฒนธรรมไทย-จีน อุดรธานี
1372 โลตัส อุดรธานี 1
------------------------------------------------------------------------
10 nearlest of user Tuk Phuengthai
1396 ทรงศักดิ์ ปฤษณานนท์
6939 หน้ารัก คน
4236 Aod Malaew
4760 KEATTISAK .R
281 เสาวลักษณ์ แก้วกํากง
4119 Tuanchai Satipa
2875 张毅群
7 ธงชัย ติระพรชัย
1037 TheeR-8 ทําเองบ้าง
3017 Mitapanee Charunnapha
------------------------------------------------------------------------
