# Bangkok Tourist Recommender System

In [60]:
import pandas as pd
import requests
import geocoder
import folium
import numpy as np
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

## 1. Data Scraping

In [61]:
html=requests.get('https://en.wikipedia.org/wiki/List_of_districts_of_Bangkok')
soup=BeautifulSoup(html.content, 'html.parser')

In [62]:
# Create a blank dataframe for storing scraped data
df_bangkok=pd.DataFrame(columns=['Neighborhood','Latitude','Longitude'])

# Create a table objct
table=soup.find('table', class_='wikitable sortable')

# Loop through the table object to get and set the value of each cell
for i,tr in enumerate(table.find_all('tr')[1:]):
        tds=tr.find_all('td')
        df_bangkok.loc[i,'Neighborhood']=tds[0].text.strip()
        df_bangkok.loc[i,'Latitude']=tds[6].text.strip()
        df_bangkok.loc[i,'Longitude']=tds[7].text.strip()

Print out the scraped data as a dataframe:

In [63]:
df_bangkok.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Bang Bon,13.6592,100.3991
1,Bang Kapi,13.765833,100.647778
2,Bang Khae,13.696111,100.409444
3,Bang Khen,13.873889,100.596389
4,Bang Kho Laem,13.693333,100.5025


## 2. Neighborhoods Exploration By Foursquare API 

In [64]:
# Define Foursquare API credentials
CLIENT_ID = 'TZLDDXPIS4ZANAZRI5ZYQ0Q4FHBLPQMMVJ1MCZ3EOBOL2Q5B' 
CLIENT_SECRET = 'PWY3OSMGFI3CZIUMKWOZROAP30FJVT1TFHHKCFHRF3J4ATXV'
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

In [65]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [66]:
# Get information for the nearby venues
bangkok_venues = getNearbyVenues(names=df_bangkok['Neighborhood'],
                                   latitudes=df_bangkok['Latitude'],
                                   longitudes=df_bangkok['Longitude']
                                  )

In [67]:
bangkok_venues=bangkok_venues[bangkok_venues['Venue Category']!='Neighborhood']

## 3. Analyze Each Neighborhood

In [68]:
# one hot encoding
bangkok_onehot = pd.get_dummies(bangkok_venues[['Venue Category']], prefix="", prefix_sep="")
cat_columns=bangkok_onehot.columns

# add neighborhood column back to dataframe
bangkok_onehot['Neighborhood'] = bangkok_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = ['Neighborhood'] + list(cat_columns)
bangkok_onehot = bangkok_onehot[fixed_columns]

bangkok_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport Service,Airport Terminal,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio,Zoo,Zoo Exhibit
0,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Bang Bon,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Examine the dataframe size

Group rows by Neighborhood and by taking the mean of the frequency of occurrence of each category

In [69]:
bangkok_grouped = bangkok_onehot.groupby('Neighborhood').sum().reset_index()
bangkok_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport Service,Airport Terminal,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Water Park,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio,Zoo,Zoo Exhibit
0,Bang Bon,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,Bang Kapi,0,0,0,0,0,0,0,0,1,...,0,1,1,1,0,0,1,0,0,0
2,Bang Khae,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,Bang Khen,0,0,0,0,0,0,0,0,3,...,0,2,0,0,0,0,0,0,0,0
4,Bang Kho Laem,0,0,0,0,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,0
5,Bang Khun Thian,0,0,0,0,0,0,0,0,3,...,0,0,0,0,0,0,0,0,0,0
6,Bang Na,0,0,0,1,0,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0
7,Bang Phlat,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
8,Bang Rak,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
9,Bang Sue,0,0,0,0,0,0,0,0,3,...,0,0,0,0,0,0,0,0,0,0


Print each neighborhood along with the top 5 most common venues

In [70]:
num_top_venues = 5

for hood in bangkok_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = bangkok_grouped[bangkok_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bang Bon----
                venue  freq
0        Noodle House   4.0
1   Convenience Store   3.0
2     Thai Restaurant   3.0
3         Coffee Shop   2.0
4  Som Tum Restaurant   1.0


----Bang Kapi----
                 venue  freq
0          Coffee Shop  10.0
1  Japanese Restaurant   7.0
2   Som Tum Restaurant   6.0
3         Noodle House   6.0
4      Thai Restaurant   5.0


----Bang Khae----
                 venue  freq
0    Convenience Store   6.0
1         Noodle House   5.0
2  Japanese Restaurant   4.0
3        Shopping Mall   2.0
4                Diner   1.0


----Bang Khen----
                  venue  freq
0     Convenience Store   6.0
1           Coffee Shop   6.0
2  Fast Food Restaurant   5.0
3     Hotpot Restaurant   3.0
4             Bookstore   3.0


----Bang Kho Laem----
                venue  freq
0         Coffee Shop   7.0
1        Noodle House   7.0
2   Convenience Store   6.0
3     Thai Restaurant   6.0
4  Seafood Restaurant   4.0


----Bang Khun Thian----
         

               venue  freq
0        Coffee Shop   7.0
1  Convenience Store   7.0
2    Thai Restaurant   5.0
3         Restaurant   4.0
4       Noodle House   4.0


----Watthana----
                 venue  freq
0          Coffee Shop  11.0
1                 Café  10.0
2  Japanese Restaurant   7.0
3         Noodle House   6.0
4                  Bar   5.0


----Yan Nawa----
                 venue  freq
0  Japanese Restaurant   9.0
1      Thai Restaurant   9.0
2    Convenience Store   6.0
3                 Café   5.0
4          Coffee Shop   5.0




Define a function to sort the venues in descending order

In [71]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create the new dataframe and display the top 10 venues for each neighborhood

In [110]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = bangkok_grouped['Neighborhood']

for ind in np.arange(bangkok_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bangkok_grouped.iloc[ind, :], num_top_venues)

#neighborhoods_venues_sorted

## 4. Cluster Neighborhoods

Run _k_-means to cluster the neighborhood into 4 clusters.

In [111]:
# set number of clusters
kclusters = 4

bangkok_grouped_clustering = bangkok_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bangkok_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([3, 0, 3, 0, 0, 0, 3, 3, 2, 1, 1, 1, 3, 0, 3, 3, 3, 1, 2, 0, 3, 2,
       3, 0, 3, 0, 3, 3, 3, 1, 0, 2, 0, 2, 1, 3, 0, 2, 3, 2, 0, 1, 1, 3,
       3, 1, 0, 0, 2, 0])

Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [112]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

bangkok_merged = df_bangkok

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
bangkok_merged = bangkok_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
bangkok_merged.reset_index(drop=True, inplace=True)
#bangkok_merged.head() # check the last columns!

Finally, let's visualize the resulting clusters

In [113]:
# create map
map_clusters = folium.Map(location=[13.736717, 100.523186], zoom_start=10) # the location of Toronto is (43.651070, -79.347015)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bangkok_merged['Latitude'], bangkok_merged['Longitude'], bangkok_merged['Neighborhood'], bangkok_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [114]:
# Cafe & Shopping Mall
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels']==0]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,0,Bang Kapi,Coffee Shop,Japanese Restaurant,Som Tum Restaurant,Noodle House,Thai Restaurant,Clothing Store,Fast Food Restaurant,Hotpot Restaurant,Dessert Shop,Shabu-Shabu Restaurant
3,0,Bang Khen,Coffee Shop,Convenience Store,Fast Food Restaurant,Som Tum Restaurant,Asian Restaurant,Bookstore,Noodle House,Hotpot Restaurant,Pub,Steakhouse
4,0,Bang Kho Laem,Coffee Shop,Noodle House,Thai Restaurant,Convenience Store,Seafood Restaurant,Chinese Restaurant,Pub,Bakery,Hotel,Supermarket
5,0,Bang Khun Thian,Coffee Shop,Ice Cream Shop,Japanese Restaurant,Thai Restaurant,Bar,Restaurant,Asian Restaurant,Steakhouse,Pizza Place,Clothing Store
13,0,Chatuchak,Coffee Shop,Thai Restaurant,Fast Food Restaurant,Japanese Restaurant,Flea Market,Noodle House,Ice Cream Shop,Gym / Fitness Center,Hotel,Multiplex
19,0,Khan Na Yao,Thai Restaurant,Coffee Shop,Noodle House,Japanese Restaurant,Ice Cream Shop,Som Tum Restaurant,Pizza Place,Bakery,Bookstore,BBQ Joint
23,0,Lak Si,Coffee Shop,Thai Restaurant,Convenience Store,Fast Food Restaurant,Japanese Restaurant,Gym / Fitness Center,Bar,Steakhouse,Dumpling Restaurant,Food Court
25,0,Lat Phrao,Noodle House,Coffee Shop,Convenience Store,Som Tum Restaurant,Café,BBQ Joint,Thai Restaurant,Restaurant,Asian Restaurant,Steakhouse
30,0,Phasi Charoen,Convenience Store,Thai Restaurant,Coffee Shop,BBQ Joint,Japanese Restaurant,Fast Food Restaurant,Hotpot Restaurant,Shabu-Shabu Restaurant,Noodle House,Steakhouse
32,0,Phra Khanong,Convenience Store,Noodle House,Coffee Shop,Hotel,Fast Food Restaurant,Ice Cream Shop,Italian Restaurant,Chinese Restaurant,Hotpot Restaurant,Café


In [115]:
# Thai & Asian Food
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels']==1]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,1,Bang Sue,Noodle House,Thai Restaurant,Convenience Store,Train Station,Coffee Shop,Ice Cream Shop,Asian Restaurant,Hotpot Restaurant,Fast Food Restaurant,Restaurant
10,1,Bangkok Noi,Noodle House,Som Tum Restaurant,Convenience Store,Café,Asian Restaurant,Dim Sum Restaurant,Park,Coffee Shop,Food Truck,Mobile Phone Shop
11,1,Bangkok Yai,Noodle House,Asian Restaurant,Convenience Store,BBQ Joint,Train Station,Coffee Shop,Dessert Shop,Farmers Market,Restaurant,Other Nightlife
17,1,Dusit,Noodle House,Thai Restaurant,Convenience Store,Asian Restaurant,Som Tum Restaurant,Coffee Shop,Palace,Café,Chinese Restaurant,Italian Restaurant
29,1,Pathum Wan,Noodle House,Asian Restaurant,Seafood Restaurant,Thai Restaurant,Hotel,Dessert Shop,Gym / Fitness Center,Coffee Shop,Hostel,Chinese Restaurant
34,1,Pom Prap Sattru Phai,Noodle House,Thai Restaurant,Café,Convenience Store,Asian Restaurant,Chinese Restaurant,Hotel,Coffee Shop,Museum,Som Tum Restaurant
41,1,Sathon,Noodle House,Asian Restaurant,Convenience Store,Chinese Restaurant,Dessert Shop,Thai Restaurant,Bar,Bakery,Hotel,Park
42,1,Suan Luang,Noodle House,Coffee Shop,Asian Restaurant,Thai Restaurant,Convenience Store,Som Tum Restaurant,Massage Studio,Shopping Mall,Chinese Restaurant,Soccer Field
45,1,Thon Buri,Noodle House,Convenience Store,Thai Restaurant,Asian Restaurant,Train Station,Motorcycle Shop,Café,Supermarket,Seafood Restaurant,Shopping Mall


In [116]:
# Nightlife & Hotel
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels']==2]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,2,Bang Rak,Noodle House,Hotel,Thai Restaurant,Chinese Restaurant,Café,Coffee Shop,Seafood Restaurant,Bar,Spa,Bakery
18,2,Huai Khwang,Thai Restaurant,Noodle House,Convenience Store,Japanese Restaurant,Asian Restaurant,Som Tum Restaurant,Hotel,Dessert Shop,BBQ Joint,Café
21,2,Khlong San,Coffee Shop,Noodle House,Dessert Shop,Chinese Restaurant,Café,Art Gallery,Thai Restaurant,Restaurant,Hotel Bar,Hotel
31,2,Phaya Thai,Thai Restaurant,Bar,Café,Coffee Shop,Japanese Restaurant,Restaurant,Sushi Restaurant,Noodle House,Bakery,Som Tum Restaurant
33,2,Phra Nakhon,Bar,Hotel,Thai Restaurant,Café,Noodle House,Asian Restaurant,Massage Studio,Hostel,Spa,Vegetarian / Vegan Restaurant
37,2,Ratchathewi,Noodle House,Coffee Shop,Hotel,Café,Hostel,Steakhouse,Japanese Restaurant,Thai Restaurant,Som Tum Restaurant,Massage Studio
39,2,Samphanthawong,Dessert Shop,Thai Restaurant,Coffee Shop,Art Gallery,Café,Bar,Hotel,Hotel Bar,Noodle House,Cocktail Bar
48,2,Watthana,Coffee Shop,Café,Japanese Restaurant,Noodle House,Thai Restaurant,Bar,Hotel,Shopping Mall,Asian Restaurant,Shabu-Shabu Restaurant


In [117]:
# Suburb of Bangkok
neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster Labels']==3]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,3,Bang Bon,Noodle House,Convenience Store,Thai Restaurant,Coffee Shop,Asian Restaurant,Garden Center,Bar,Bistro,Chinese Restaurant,Gas Station
2,3,Bang Khae,Convenience Store,Noodle House,Japanese Restaurant,Shopping Mall,Auto Garage,Food & Drink Shop,Coffee Shop,Shop & Service,Miscellaneous Shop,Supermarket
6,3,Bang Na,Thai Restaurant,Noodle House,Asian Restaurant,Restaurant,Residential Building (Apartment / Condo),Market,Supermarket,Convenience Store,Seafood Restaurant,Resort
7,3,Bang Phlat,Convenience Store,Coffee Shop,Fast Food Restaurant,Pier,Sporting Goods Shop,Café,Seafood Restaurant,Cocktail Bar,Flea Market,Hotpot Restaurant
12,3,Bueng Kum,Coffee Shop,Café,Supermarket,Convenience Store,Flea Market,Park,Noodle House,Shop & Service,Thai Restaurant,Bakery
14,3,Chom Thong,Thai Restaurant,Convenience Store,Coffee Shop,Asian Restaurant,Dog Run,Bus Stop,Food Truck,Massage Studio,Toll Plaza,Flea Market
15,3,Din Daeng,Convenience Store,Noodle House,Sports Club,Thai Restaurant,Hotel,Bakery,Café,Mosque,Food Court,Chinese Restaurant
16,3,Don Mueang,Noodle House,Convenience Store,Restaurant,Bus Station,Café,Chinese Restaurant,Steakhouse,Seafood Restaurant,Sushi Restaurant,Taxi Stand
20,3,Khlong Sam Wa,Exhibit,Restaurant,Grocery Store,Thai Restaurant,Noodle House,Dessert Shop,Zoo,Japanese Restaurant,Coffee Shop,Convenience Store
22,3,Khlong Toei,Convenience Store,Coffee Shop,Bar,Boat or Ferry,Karaoke Bar,Tennis Court,Burger Joint,Food Truck,Bakery,Noodle House
