In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Data Manipulation
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors, KNeighborsRegressor
from sklearn.metrics.pairwise import haversine_distances

# Load Dataset:

In [17]:
orgin_resturant_dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Yumspeak/merged_dataset.csv')

Data Inspection:

In [30]:
orgin_resturant_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 619630 entries, 0 to 619629
Data columns (total 23 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   place_id                             619630 non-null  object 
 1   name                                 619630 non-null  object 
 2   reviews                              619630 non-null  int64  
 3   main_category                        619630 non-null  object 
 4   main_rating                          619630 non-null  float64
 5   address                              619630 non-null  object 
 6   link                                 619630 non-null  object 
 7   review_photos                        619630 non-null  object 
 8   cuisine                              619630 non-null  object 
 9   latitude                             619630 non-null  float64
 10  longtitude                           619630 non-null  float64
 11  full_postal_c

In [47]:
orgin_resturant_dataset.drop_duplicates(subset=['place_id'], inplace = True)

## Model 1: Normal KNN with basic coordinate search only

Target & feature selection:

In [112]:
# 'main_rating': 'best', 'review': popular, 'n_neighbors': multi values
firstMod_feature_dataset = orgin_resturant_dataset[['latitude', 'longtitude']]
firstMod_target_dataset = orgin_resturant_dataset['main_category']

## Model: *KNeighborsClassifier*

In [113]:
def KNN_norm_cord_search(test_coordinate, search_area = 3):
  KNN = KNeighborsClassifier(metric='haversine')
  KNN.fit(firstMod_feature_dataset.values, firstMod_target_dataset)
  neighbors = KNN.kneighbors(test_coordinate, n_neighbors=search_area, return_distance=False)
  return neighbors

In [148]:
# Test coordinate
test_coordinate = [[1.284495, 103.852716]]
test_searchresult = 5

# Show test coordinates
KNN_result_resturant_dataset = orgin_resturant_dataset.iloc[KNN_norm_cord_search(test_coordinate, test_searchresult)[0]][['name', 'reviews', 'main_category', 'main_rating',
       'address','cuisine', 'latitude', 'longtitude']]
for index, row in KNN_result_resturant_dataset.iterrows():
    print(f"Name: {row['name']}, Address: {row['address']}")
    print(f"You are {haversine_distance(test_coordinate[0][0], test_coordinate[0][1], row['latitude'], row['longtitude'])}m away from the location.\n")
KNN_result_resturant_dataset

Name: Taki Izakaya Bar, Address: 16 Collyer Quay, #01-05A B, Singapore 049318
You are 28.695288104922874m away from the location.

Name: Pasta e Formaggio(P&F) @ CIMB Plaza, Address: 16 Collyer Quay, #01-13 Centre, Singapore 049318
You are 41.76874636492119m away from the location.

Name: Tanamera Coffee CIMB Plaza, Address: 30 Raffles Pl, #01 - 12 / 13, Singapore 048622
You are 75.43466239966698m away from the location.

Name: The Fullerton Pavillion, Address: 82 Collyer Quay, Singapore 049213
You are 139.10741070442836m away from the location.

Name: Dungeon by IL Fiore, Address: 30 Raffles Pl, #01-16/17 CIMB Plaza, Singapore 048622
You are 56.62468155525152m away from the location.



Unnamed: 0,name,reviews,main_category,main_rating,address,cuisine,latitude,longtitude
82708,Taki Izakaya Bar,250,Modern izakaya,4.4,"16 Collyer Quay, #01-05A B, Singapore 049318",Japanese,1.284237,103.85271
287893,Pasta e Formaggio(P&F) @ CIMB Plaza,172,Italian,4.4,"16 Collyer Quay, #01-13 Centre, Singapore 049318",Italian,1.284234,103.852445
55704,Tanamera Coffee CIMB Plaza,269,Cafe,4.4,"30 Raffles Pl, #01 - 12 / 13, Singapore 048622",Cafe,1.284197,103.852107
157792,The Fullerton Pavillion,211,Fusion,4.6,"82 Collyer Quay, Singapore 049213",Fusion,1.284579,103.853965
490576,Dungeon by IL Fiore,166,Italian,4.4,"30 Raffles Pl, #01-16/17 CIMB Plaza, Singapore...",Italian,1.284107,103.852386


## Model: *NearestNeighbors*

In [59]:
def NN_norm_cord_search(coordinate, search_area = 3):
  NN = NearestNeighbors(metric='haversine')
  NN.fit(firstMod_feature_dataset.values, firstMod_target_dataset)
  neighbors = NN.kneighbors(coordinate, n_neighbors=search_area, return_distance=False)
  return neighbors

In [147]:
# Test coordinate
test_coordinate = [[1.306310, 103.831698]]
test_searchresult = 10

NN_result_resturant_dataset = orgin_resturant_dataset.iloc[NN_norm_cord_search(test_coordinate, test_searchresult)[0]][['name', 'main_category', 'address','cuisine', 'main_rating','reviews',
       'latitude', 'longtitude']]
for index, row in NN_result_resturant_dataset.iterrows():
    print(f"Name: {row['name']}, Address: {row['address']}")
    print(f"You are {haversine_distance(test_coordinate[0][0], test_coordinate[0][1], row['latitude'], row['longtitude'])} m away from the location.\n")
NN_result_resturant_dataset

Name: Mui Kee Orchard, Address: 1 Scotts Rd, #03-09 Shaw Centre, Singapore 228208
You are 18.453192168475475 m away from the location.

Name: Picolino Orchard, Address: 1 Scotts Rd, #03-23/24 Shaw Centre, Singapore 228208
You are 18.453192168475475 m away from the location.

Name: Sushi Jin, Address: 1 Scotts Rd, #01-11 Shaw Centre, Singapore 228208
You are 18.453192168475475 m away from the location.

Name: 8 Korean BBQ (Shaw Centre) | Korean Bbq Restaurant in Orchard, Address: 1 Scotts Rd, #04 - 20 / 21, Singapore 228208
You are 16.67190943114462 m away from the location.

Name: La Tapería, Address: 1 Scotts Rd, #02 - 10 / 11, Singapore 228208
You are 23.75514552527607 m away from the location.

Name: Bistro Du Vin, Address: 1 Scotts Rd, #01 - 14 Shaw Centre, Singapore 228208
You are 26.76722956143051 m away from the location.

Name: Xi Yan@Shaw, Address: 1 Scotts Rd, #03 - 12 / 13, Singapore 228208
You are 35.58767540641924 m away from the location.

Name: Happy Lamb Hot Pot 快乐小羊火锅 

Unnamed: 0,name,main_category,address,cuisine,main_rating,reviews,latitude,longtitude
253031,Mui Kee Orchard,Cantonese,"1 Scotts Rd, #03-09 Shaw Centre, Singapore 228208",Chinese,4.4,1554,1.306343,103.831861
301321,Picolino Orchard,Italian,"1 Scotts Rd, #03-23/24 Shaw Centre, Singapore ...",Italian,4.3,815,1.306343,103.831861
77377,Sushi Jin,Japanese,"1 Scotts Rd, #01-11 Shaw Centre, Singapore 228208",Japanese,4.4,426,1.306343,103.831861
318855,8 Korean BBQ (Shaw Centre) | Korean Bbq Restau...,Korean,"1 Scotts Rd, #04 - 20 / 21, Singapore 228208",Korean,4.3,658,1.306241,103.831831
216669,La Tapería,Spanish,"1 Scotts Rd, #02 - 10 / 11, Singapore 228208",Spanish,4.4,788,1.306523,103.83169
383109,Bistro Du Vin,French,"1 Scotts Rd, #01 - 14 Shaw Centre, Singapore 2...",French,4.4,682,1.306525,103.83159
142075,Xi Yan@Shaw,Chinese,"1 Scotts Rd, #03 - 12 / 13, Singapore 228208",Chinese,4.2,343,1.306602,103.831566
587516,Happy Lamb Hot Pot 快乐小羊火锅 - Orchard,Buffet,"9 Scotts Rd, #02-10 Pacific Plaza, Singapore 2...",Buffet,4.7,1662,1.306598,103.832087
128155,VietSmith - Pacific Plaza,Vietnamese,"9 Scotts Rd, #01-09/10/11/12 Pacific Plaza, Si...",Asian,4.6,158,1.306598,103.832087
291262,"Leckerbaer (Isetan, Shaw House)",Cafe,"350 Orchard Road, #01-K2 & #01-K3, Orchard, 23...",Cafe,3.8,98,1.305907,103.831498


---

# Model 2: KNN with rating search only

In [135]:
# Prepare feature dataset and target dataset
secondMod_feature_dataset = orgin_resturant_dataset[['latitude','longtitude']]
secondMod_target_dataset = orgin_resturant_dataset[['main_rating']]

In [None]:
# Encode 'main_category'
# Standard_Scaler = StandardScaler()
# secondMod_target_dataset['main_rating'] = Standard_Scaler.fit_transform(secondMod_target_dataset)

In [138]:
def norm_rating_cord_search(test_coordinate, search_area = 3):
  KNN_rating = KNeighborsRegressor(metric='haversine')
  KNN_rating.fit(secondMod_feature_dataset.values, secondMod_target_dataset)
  neighbors_distances_rating, neighbors_indices_rating = KNN_rating.kneighbors(test_coordinate, n_neighbors=search_area)
  return neighbors_indices_rating

In [139]:
test_coordinate = [[1.284495, 103.852716]]
test_searchresult = 5

# norm_rating_cord_search(test_coordinate)
modtwo_resturant_dataset = orgin_resturant_dataset.iloc[norm_rating_cord_search(test_coordinate, test_searchresult)[0]][['name', 'main_category', 'address','cuisine', 'main_rating','reviews',
       'latitude', 'longtitude']].sort_values(by='main_rating', ascending=False)
modtwo_resturant_dataset

Unnamed: 0,name,main_category,address,cuisine,main_rating,reviews,latitude,longtitude
157792,The Fullerton Pavillion,Fusion,"82 Collyer Quay, Singapore 049213",Fusion,4.6,211,1.284579,103.853965
82708,Taki Izakaya Bar,Modern izakaya,"16 Collyer Quay, #01-05A B, Singapore 049318",Japanese,4.4,250,1.284237,103.85271
287893,Pasta e Formaggio(P&F) @ CIMB Plaza,Italian,"16 Collyer Quay, #01-13 Centre, Singapore 049318",Italian,4.4,172,1.284234,103.852445
55704,Tanamera Coffee CIMB Plaza,Cafe,"30 Raffles Pl, #01 - 12 / 13, Singapore 048622",Cafe,4.4,269,1.284197,103.852107
490576,Dungeon by IL Fiore,Italian,"30 Raffles Pl, #01-16/17 CIMB Plaza, Singapore...",Italian,4.4,166,1.284107,103.852386


# Model 3: KNN with reviews search only

In [25]:
# Prepare feature dataset and target dataset
thirdMod_feature_dataset = orgin_resturant_dataset[['latitude','longtitude']]
thirdMod_target_dataset = orgin_resturant_dataset[['reviews']]

In [26]:
def norm_rating_cord_search(test_coordinate, search_area = 3):
  KNN_rating = KNeighborsRegressor(metric='haversine')
  KNN_rating.fit(secondMod_feature_dataset, secondMod_target_dataset)
  neighbors_distances_rating, neighbors_indices_rating = KNN_rating.kneighbors(test_coordinate, n_neighbors=search_area)
  return neighbors_indices_rating

In [27]:
test_coordinate = [[1.284495, 103.852716]]
test_searchresult = 5

# norm_rating_cord_search(test_coordinate)
modtwo_resturant_dataset = orgin_resturant_dataset.iloc[norm_rating_cord_search(test_coordinate, test_searchresult)[0]].sort_values(by='reviews', ascending=False)
modtwo_resturant_dataset



Unnamed: 0,place_id,name,reviews,main_category,main_rating,address,link,review_photos,cuisine,latitude,...,district_code,region,rating,review_text,published_at_date,review_likes_count,response_from_owner_text,total_number_of_reviews_by_reviewer,total_number_of_photos_by_reviewer,is_local_guide
82709,ChIJCz-TrIoZ2jERBlG8g0v-5Nc,Taki Izakaya Bar,250,Modern izakaya,4.4,"16 Collyer Quay, #01-05A B, Singapore 049318",https://www.google.com/maps/place/Taki+Izakaya...,['https://lh5.googleusercontent.com/p/AF1QipOI...,Japanese,1.284237,...,1,City,5,We held a corporate event at Taki and the team...,2024-08-02,0,,4.0,0.0,False
82712,ChIJCz-TrIoZ2jERBlG8g0v-5Nc,Taki Izakaya Bar,250,Modern izakaya,4.4,"16 Collyer Quay, #01-05A B, Singapore 049318",https://www.google.com/maps/place/Taki+Izakaya...,['https://lh5.googleusercontent.com/p/AF1QipOI...,Japanese,1.284237,...,1,City,5,"Love the Japanese elements, where there are be...",2024-07-30,0,,22.0,105.0,False
82711,ChIJCz-TrIoZ2jERBlG8g0v-5Nc,Taki Izakaya Bar,250,Modern izakaya,4.4,"16 Collyer Quay, #01-05A B, Singapore 049318",https://www.google.com/maps/place/Taki+Izakaya...,['https://lh5.googleusercontent.com/p/AF1QipOI...,Japanese,1.284237,...,1,City,2,Disappointed at the food quality after reading...,2024-07-30,0,,39.0,73.0,True
82708,ChIJCz-TrIoZ2jERBlG8g0v-5Nc,Taki Izakaya Bar,250,Modern izakaya,4.4,"16 Collyer Quay, #01-05A B, Singapore 049318",https://www.google.com/maps/place/Taki+Izakaya...,['https://lh5.googleusercontent.com/p/AF1QipOI...,Japanese,1.284237,...,1,City,5,Love the tori karaage! Nice nanban sauce :) gr...,2024-08-16,0,,11.0,40.0,False
82710,ChIJCz-TrIoZ2jERBlG8g0v-5Nc,Taki Izakaya Bar,250,Modern izakaya,4.4,"16 Collyer Quay, #01-05A B, Singapore 049318",https://www.google.com/maps/place/Taki+Izakaya...,['https://lh5.googleusercontent.com/p/AF1QipOI...,Japanese,1.284237,...,1,City,5,Was there on a Friday night with my hubby. Lov...,2024-08-02,1,,12.0,66.0,False


# Distance to location:

In [140]:
import math

def haversine_distance(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    R = 6371.0

    # Convert latitude and longitude from degrees to radians
    lat1_rad = math.radians(lat1)
    lon1_rad = math.radians(lon1)
    lat2_rad = math.radians(lat2)
    lon2_rad = math.radians(lon2)

    # Differences in coordinates
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad

    # Haversine formula
    a = math.sin(dlat / 2) ** 2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # Distance in kilometers
    distance = R * c * 1000

    return distance