### 1. Import libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

!pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!pip install BeautifulSoup4
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Collecting geopy
[?25l  Downloading https://files.pythonhosted.org/packages/80/93/d384479da0ead712bdaf697a8399c13a9a89bd856ada5a27d462fb45e47b/geopy-1.20.0-py2.py3-none-any.whl (100kB)
[K     |████████████████████████████████| 102kB 2.6MB/s ta 0:00:011
[?25hCollecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/5b/ac/4f348828091490d77899bc74e92238e2b55c59392f21948f296e94e50e2b/geographiclib-1.49.tar.gz
Building wheels for collected packages: geographiclib
  Building wheel for geographiclib (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/jupyterlab/.cache/pip/wheels/99/45/d1/14954797e2a976083182c2e7da9b4e924509e59b6e5c661061
Successfully built geographiclib
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.49 geopy-1.20.0
Collecting BeautifulSoup4
[?25l  Downloading https://files.pythonhosted.org/packages/1a/b7/34eec2fe5a49718944e215fde81288eec1fa04638aa3fb57c1c6cd0f98c3/beautifulsoup

### 2. Scrape the table from Wikipedia and transform the data into a pandas dataframe

In [2]:
# parse data from wikipedia into a BeautifulSoup object
source=requests.get('https://en.wikipedia.org/wiki/List_of_districts_of_Bangkok').text
soup=BeautifulSoup(source,'html.parser')

In [3]:
# create district list
district=[]
lat=[]
long=[]

In [4]:
# append the data into the respective lists
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        district.append(cells[0].text)
        lat.append(cells[5].text.rstrip('\n'))
        long.append(cells[6].text.rstrip('\n'))

In [5]:
# define the dataframe columns
column_names = ['District', 'Lat', 'Long'] 

# instantiate the dataframe
bk_df = pd.DataFrame(columns=column_names)

In [6]:
# input value into the dataframe
bk_df['District']=district
bk_df['Lat']=lat
bk_df['Long']=long
bk_df.head()

Unnamed: 0,District,Lat,Long
0,Bang Bon,,
1,Bang Kapi,13.765833,100.647778
2,Bang Khae,13.696111,100.409444
3,Bang Khen,13.873889,100.596389
4,Bang Kho Laem,13.693333,100.5025


### 3. Ignore cells with no Lat/Long

In [7]:
bk_df_clean=bk_df[bk_df.Lat!='NA'].reset_index(drop = True)

In [8]:
bk_df_clean.Lat = bk_df_clean.Lat.astype(float)
bk_df_clean.Long = bk_df_clean.Long.astype(float)
bk_df_clean.head()

Unnamed: 0,District,Lat,Long
0,Bang Kapi,13.765833,100.647778
1,Bang Khae,13.696111,100.409444
2,Bang Khen,13.873889,100.596389
3,Bang Kho Laem,13.693333,100.5025
4,Bang Khun Thian,13.660833,100.435833


In [9]:
bk_df_clean.shape

(45, 3)

### 4. Use geopy library to get the latitude and longitude values of Bangkok

In [10]:
address = 'Bangkok'

geolocator = Nominatim(user_agent="bk_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bangkok are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Bangkok are 13.7538929, 100.8160803.


### 5. Create a map of Bangkok with district superimposed on top.

In [11]:
# create map of Bangkok using latitude and longitude values
map_bangkok = folium.Map(location=[latitude, longitude], zoom_start=10)

In [12]:
# add markers to map
for district, lat, long in zip(bk_df_clean['District'], bk_df_clean['Lat'], bk_df_clean['Long']):
    label = '{}'.format(district)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bangkok)  
    
map_bangkok

### 6. Utilize the Foursquare API to explore the district and segment them

In [13]:
CLIENT_ID = '51RLBXXBPUTBKVSSH5AF1QCC5ZGMNWKLZ32WXZLKMLVE4HQ1' # your Foursquare ID
CLIENT_SECRET = '1SBPFHXLTPWUBXHAIYPE1MYXXCJHIP4CV3V05IISW2YXADAG' # your Foursquare Secret
VERSION = '20190912' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 51RLBXXBPUTBKVSSH5AF1QCC5ZGMNWKLZ32WXZLKMLVE4HQ1
CLIENT_SECRET:1SBPFHXLTPWUBXHAIYPE1MYXXCJHIP4CV3V05IISW2YXADAG


### 7. Get the top 100 venues that are in District within a radius of 500 meters.

In [14]:
radius=500
LIMIT=100

In [15]:
venues_list = []

In [16]:
for district, lat, long in zip(bk_df_clean['District'], bk_df_clean['Lat'], bk_df_clean['Long']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
        # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
    venues_list.append([(
        district,
        lat, 
        long, 
        v['venue']['name'], 
        v['venue']['location']['lat'], 
        v['venue']['location']['lng'],  
        v['venue']['categories'][0]['name']) for v in results])

In [17]:
nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
nearby_venues.columns = ['District', 
                  'Lat', 
                  'Long', 
                  'Venue Name', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']

In [18]:
print (nearby_venues.shape)
nearby_venues.head()

(1000, 7)


Unnamed: 0,District,Lat,Long,Venue Name,Venue Latitude,Venue Longitude,Venue Category
0,Bang Kapi,13.765833,100.647778,Suan Son (สวนสน),13.76294,100.648895,Park
1,Bang Kapi,13.765833,100.647778,Tea Pe Ooh,13.762931,100.647936,Coffee Shop
2,Bang Kapi,13.765833,100.647778,ซ้งโภชนา ลูกชิ้นปลาทำเอง,13.761782,100.64801,Noodle House
3,Bang Kapi,13.765833,100.647778,ตลาดนัดแฟลตคลองจั่น,13.770105,100.649124,Flea Market
4,Bang Kapi,13.765833,100.647778,Baan Suan Massage (บ้านสวนมาสสาจ),13.765412,100.651192,Massage Studio


### 8. Check how many venues and unique categories are returned

In [19]:
nearby_venues.groupby(["District"]).count()

Unnamed: 0_level_0,Lat,Long,Venue Name,Venue Latitude,Venue Longitude,Venue Category
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bang Kapi,21,21,21,21,21,21
Bang Khae,14,14,14,14,14,14
Bang Khen,10,10,10,10,10,10
Bang Kho Laem,17,17,17,17,17,17
Bang Khun Thian,39,39,39,39,39,39
Bang Na,10,10,10,10,10,10
Bang Phlat,11,11,11,11,11,11
Bang Rak,32,32,32,32,32,32
Bang Sue,11,11,11,11,11,11
Bangkok Noi,18,18,18,18,18,18


In [20]:
print('There are {} uniques categories.'.format(len(nearby_venues['Venue Category'].unique())))

There are 149 uniques categories.


### 9. Analyze each district

In [21]:
# one hot encoding
bk_onehot = pd.get_dummies(nearby_venues[['Venue Category']], prefix="", prefix_sep="")

In [22]:
# add district column back to dataframe
bk_onehot['District'] = nearby_venues['District'] 
bk_onehot['Lat'] = nearby_venues['Lat'] 
bk_onehot['Long'] = nearby_venues['Long'] 

# move district column to the first column
fixed_columns = [bk_onehot.columns[-1]] + list(bk_onehot.columns[:-1])
bk_onehot = bk_onehot[fixed_columns]

print (bk_onehot.shape)
bk_onehot.head()

(1000, 152)


Unnamed: 0,Long,American Restaurant,Art Gallery,Asian Restaurant,Auto Garage,BBQ Joint,Badminton Court,Bakery,Bar,Bed & Breakfast,Beer Bar,Big Box Store,Bike Rental / Bike Share,Bistro,Boat or Ferry,Bookstore,Breakfast Spot,Brewery,Buffet,Building,Burger Joint,Bus Station,Bus Stop,Cafeteria,Café,Canal,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Spot,Dog Run,Donburi Restaurant,Dumpling Restaurant,Duty-free Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Flea Market,Floating Market,Food & Drink Shop,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden Center,German Restaurant,Gift Shop,Golf Driving Range,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Halal Restaurant,Harbor / Marina,Historic Site,History Museum,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Intersection,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Karaoke Bar,Korean Restaurant,Lounge,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,Noodle House,Organic Grocery,Other Nightlife,Other Repair Shop,Park,Pedestrian Plaza,Pet Store,Pharmacy,Photography Studio,Pizza Place,Playground,Pool,Pub,Ramen Restaurant,Recording Studio,Recreation Center,Residential Building (Apartment / Condo),Resort,Restaurant,Roof Deck,Sake Bar,Salad Place,Satay Restaurant,Seafood Restaurant,Shabu-Shabu Restaurant,Shoe Store,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Soccer Stadium,Som Tum Restaurant,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Club,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Toll Plaza,Tour Provider,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Store,Vietnamese Restaurant,Wings Joint,District,Lat
0,100.647778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Bang Kapi,13.765833
1,100.647778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Bang Kapi,13.765833
2,100.647778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Bang Kapi,13.765833
3,100.647778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Bang Kapi,13.765833
4,100.647778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Bang Kapi,13.765833


### 10. Group rows by "district, latitude and longitude" and take the mean of the frequency of occurrence of each category

In [23]:
bk_grouped = bk_onehot.groupby(["District", "Lat", "Long"]).mean().reset_index()
print (bk_grouped.shape)
bk_grouped.head()

(45, 152)


Unnamed: 0,District,Lat,Long,American Restaurant,Art Gallery,Asian Restaurant,Auto Garage,BBQ Joint,Badminton Court,Bakery,Bar,Bed & Breakfast,Beer Bar,Big Box Store,Bike Rental / Bike Share,Bistro,Boat or Ferry,Bookstore,Breakfast Spot,Brewery,Buffet,Building,Burger Joint,Bus Station,Bus Stop,Cafeteria,Café,Canal,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Coworking Space,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Spot,Dog Run,Donburi Restaurant,Dumpling Restaurant,Duty-free Shop,Electronics Store,Farmers Market,Fast Food Restaurant,Flea Market,Floating Market,Food & Drink Shop,Food Court,Food Stand,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden Center,German Restaurant,Gift Shop,Golf Driving Range,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Halal Restaurant,Harbor / Marina,Historic Site,History Museum,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Intersection,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Karaoke Bar,Korean Restaurant,Lounge,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Mexican Restaurant,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,Noodle House,Organic Grocery,Other Nightlife,Other Repair Shop,Park,Pedestrian Plaza,Pet Store,Pharmacy,Photography Studio,Pizza Place,Playground,Pool,Pub,Ramen Restaurant,Recording Studio,Recreation Center,Residential Building (Apartment / Condo),Resort,Restaurant,Roof Deck,Sake Bar,Salad Place,Satay Restaurant,Seafood Restaurant,Shabu-Shabu Restaurant,Shoe Store,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Soccer Stadium,Som Tum Restaurant,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Club,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Toll Plaza,Tour Provider,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Store,Vietnamese Restaurant,Wings Joint
0,Bang Kapi,13.765833,100.647778,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0
1,Bang Khae,13.696111,100.409444,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429
2,Bang Khen,13.873889,100.596389,0.0,0.0,0.2,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0
3,Bang Kho Laem,13.693333,100.5025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.117647,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.176471,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0
4,Bang Khun Thian,13.660833,100.435833,0.0,0.0,0.0,0.0,0.025641,0.025641,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.051282,0.0,0.025641,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.102564,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.025641,0.0,0.0,0.102564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 11. Print each district along with the top 10 most common venues

In [24]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['District', 'Lat', 'Long']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

In [25]:
# create a new dataframe
district_venues_sorted = pd.DataFrame(columns=columns)
district_venues_sorted['District'] = bk_grouped['District']
district_venues_sorted['Lat'] = bk_grouped['Lat']
district_venues_sorted['Long'] = bk_grouped['Long']

for ind in np.arange(bk_grouped.shape[0]):
    row_categories = bk_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    district_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

    # district_venues_sorted.sort_values(freqColumns, inplace=True)
print(district_venues_sorted.shape)
district_venues_sorted.head()

(45, 13)


Unnamed: 0,District,Lat,Long,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bang Kapi,13.765833,100.647778,Convenience Store,Noodle House,Thai Restaurant,Flea Market,Coffee Shop,Market,Steakhouse,Massage Studio,Buffet,Museum
1,Bang Khae,13.696111,100.409444,Japanese Restaurant,Wings Joint,Gourmet Shop,Shopping Mall,Noodle House,Coffee Shop,Fast Food Restaurant,Convenience Store,Supermarket,Grocery Store
2,Bang Khen,13.873889,100.596389,Asian Restaurant,Coffee Shop,Convenience Store,Bakery,Garden Center,Noodle House,Som Tum Restaurant,Vietnamese Restaurant,Duty-free Shop,Electronics Store
3,Bang Kho Laem,13.693333,100.5025,Noodle House,Thai Restaurant,Chinese Restaurant,Coffee Shop,Sushi Restaurant,Hotpot Restaurant,Shopping Mall,Vietnamese Restaurant,Seafood Restaurant,Fast Food Restaurant
4,Bang Khun Thian,13.660833,100.435833,Thai Restaurant,Japanese Restaurant,Noodle House,Pizza Place,Café,Bakery,Restaurant,Juice Bar,Fast Food Restaurant,Building


### 12. Clustering analysis

In [26]:
# set number of clusters
kclusters = 5

bk_grouped_clustering = bk_grouped.drop(['District','Lat','Long'],1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bk_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 0, 4, 0, 3, 0], dtype=int32)

In [27]:
# add clustering labels
district_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# add latitude/longitude for each district with the sorted venues
bk_kmeans_df = bk_df_clean.join(district_venues_sorted.drop(["Lat", "Long"], 1).set_index("District"), on="District")
print (bk_kmeans_df.shape)
bk_kmeans_df.head()

(45, 14)


Unnamed: 0,District,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bang Kapi,13.765833,100.647778,2,Convenience Store,Noodle House,Thai Restaurant,Flea Market,Coffee Shop,Market,Steakhouse,Massage Studio,Buffet,Museum
1,Bang Khae,13.696111,100.409444,2,Japanese Restaurant,Wings Joint,Gourmet Shop,Shopping Mall,Noodle House,Coffee Shop,Fast Food Restaurant,Convenience Store,Supermarket,Grocery Store
2,Bang Khen,13.873889,100.596389,2,Asian Restaurant,Coffee Shop,Convenience Store,Bakery,Garden Center,Noodle House,Som Tum Restaurant,Vietnamese Restaurant,Duty-free Shop,Electronics Store
3,Bang Kho Laem,13.693333,100.5025,2,Noodle House,Thai Restaurant,Chinese Restaurant,Coffee Shop,Sushi Restaurant,Hotpot Restaurant,Shopping Mall,Vietnamese Restaurant,Seafood Restaurant,Fast Food Restaurant
4,Bang Khun Thian,13.660833,100.435833,2,Thai Restaurant,Japanese Restaurant,Noodle House,Pizza Place,Café,Bakery,Restaurant,Juice Bar,Fast Food Restaurant,Building


In [28]:
# create map
map_bk_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for district, lat, long, cluster in zip(bk_kmeans_df['District'], bk_kmeans_df['Lat'], bk_kmeans_df['Long'], bk_kmeans_df['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_bk_clusters)
       
map_bk_clusters

### 13. Examine Clusters

#### Cluster 1

In [29]:
bk_kmeans_df.loc[bk_kmeans_df['Cluster Labels'] == 0, bk_kmeans_df.columns[[0] + list(range(3, bk_kmeans_df.shape[1]))]]

Unnamed: 0,District,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Bang Na,0,Asian Restaurant,Noodle House,Intersection,Coffee Shop,Café,Seafood Restaurant,Satay Restaurant,Thai Restaurant,Electronics Store,Farmers Market
7,Bang Rak,0,Noodle House,Hotel,Chinese Restaurant,Hotel Bar,Café,Clothing Store,Coffee Shop,Som Tum Restaurant,Massage Studio,Buffet
9,Bangkok Noi,0,Noodle House,Thai Restaurant,Park,Snack Place,Café,Chinese Restaurant,Coffee Shop,Dessert Shop,Asian Restaurant,Electronics Store
10,Bangkok Yai,0,Noodle House,Asian Restaurant,Dessert Shop,Farmers Market,Coffee Shop,Photography Studio,Train Station,BBQ Joint,Soup Place,Thai Restaurant
16,Dusit,0,Noodle House,Asian Restaurant,Dessert Shop,Convenience Store,Som Tum Restaurant,Coffee Shop,Market,Wings Joint,Floating Market,Food Truck
32,Pom Prap Sattru Phai,0,Noodle House,Café,Asian Restaurant,Thai Restaurant,Chinese Restaurant,Dim Sum Restaurant,Convenience Store,American Restaurant,Snack Place,Shopping Mall
39,Sathon,0,Noodle House,Asian Restaurant,Thai Restaurant,Chinese Restaurant,Convenience Store,Coffee Shop,Dessert Shop,Seafood Restaurant,Bakery,Bar
40,Suan Luang,0,Noodle House,Chinese Restaurant,Thai Restaurant,Coffee Shop,Asian Restaurant,Convenience Store,Som Tum Restaurant,German Restaurant,Café,Farmers Market
42,Thon Buri,0,Noodle House,Bus Stop,Fried Chicken Joint,Asian Restaurant,Pharmacy,Train Station,Deli / Bodega,Spa,Seafood Restaurant,Food Court


#### Cluster 2

In [30]:
bk_kmeans_df.loc[bk_kmeans_df['Cluster Labels'] == 1, bk_kmeans_df.columns[[0] + list(range(3, bk_kmeans_df.shape[1]))]]

Unnamed: 0,District,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Bueng Kum,1,Food Truck,Miscellaneous Shop,Convenience Store,Gym / Fitness Center,Wings Joint,Fast Food Restaurant,Food Stand,Food Court,Food & Drink Shop,Floating Market


#### Cluster 3

In [31]:
bk_kmeans_df.loc[bk_kmeans_df['Cluster Labels'] == 2, bk_kmeans_df.columns[[0] + list(range(3, bk_kmeans_df.shape[1]))]]

Unnamed: 0,District,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bang Kapi,2,Convenience Store,Noodle House,Thai Restaurant,Flea Market,Coffee Shop,Market,Steakhouse,Massage Studio,Buffet,Museum
1,Bang Khae,2,Japanese Restaurant,Wings Joint,Gourmet Shop,Shopping Mall,Noodle House,Coffee Shop,Fast Food Restaurant,Convenience Store,Supermarket,Grocery Store
2,Bang Khen,2,Asian Restaurant,Coffee Shop,Convenience Store,Bakery,Garden Center,Noodle House,Som Tum Restaurant,Vietnamese Restaurant,Duty-free Shop,Electronics Store
3,Bang Kho Laem,2,Noodle House,Thai Restaurant,Chinese Restaurant,Coffee Shop,Sushi Restaurant,Hotpot Restaurant,Shopping Mall,Vietnamese Restaurant,Seafood Restaurant,Fast Food Restaurant
4,Bang Khun Thian,2,Thai Restaurant,Japanese Restaurant,Noodle House,Pizza Place,Café,Bakery,Restaurant,Juice Bar,Fast Food Restaurant,Building
12,Chatuchak,2,Coffee Shop,Flea Market,Thai Restaurant,Beer Bar,Food Truck,Cocktail Bar,Convenience Store,Noodle House,Massage Studio,Gym
17,Huai Khwang,2,Asian Restaurant,Som Tum Restaurant,Hotel,Noodle House,Thai Restaurant,Restaurant,Chinese Restaurant,Grocery Store,Supermarket,Hotpot Restaurant
19,Khlong San,2,Hotel Bar,Clothing Store,Café,Hotel,Department Store,Dessert Shop,Coffee Shop,Noodle House,Japanese Restaurant,Park
21,Lak Si,2,Thai Restaurant,Coffee Shop,Japanese Restaurant,Hotel,Supermarket,Shopping Mall,Food Court,Café,Canal,Soccer Stadium
23,Lat Phrao,2,Noodle House,Café,Som Tum Restaurant,Asian Restaurant,Coffee Shop,Gift Shop,Hotel,Organic Grocery,Convenience Store,Sake Bar


#### Cluster 4

In [32]:
bk_kmeans_df.loc[bk_kmeans_df['Cluster Labels'] == 3, bk_kmeans_df.columns[[0] + list(range(3, bk_kmeans_df.shape[1]))]]

Unnamed: 0,District,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Bang Sue,3,Thai Restaurant,Coffee Shop,Noodle House,Hotpot Restaurant,Badminton Court,Bar,Seafood Restaurant,Wings Joint,Food Stand,Food Court
13,Chom Thong,3,Thai Restaurant,Toll Plaza,Fast Food Restaurant,Coffee Shop,Electronics Store,Food Stand,Food Court,Food & Drink Shop,Floating Market,Flea Market
22,Lat Krabang,3,Thai Restaurant,Steakhouse,Asian Restaurant,Café,Bar,Restaurant,Boat or Ferry,Noodle House,Flea Market,Wings Joint
24,Min Buri,3,Intersection,Department Store,Coffee Shop,Thai Restaurant,Dog Run,Donburi Restaurant,Dumpling Restaurant,Duty-free Shop,Electronics Store,Dive Spot
25,Nong Chok,3,Thai Restaurant,Gym / Fitness Center,Flea Market,Shopping Mall,Other Repair Shop,Park,Dessert Shop,Convenience Store,Duty-free Shop,Electronics Store
34,Rat Burana,3,Thai Restaurant,Coffee Shop,Hotpot Restaurant,Noodle House,Chinese Restaurant,Bistro,Food Stand,Food Court,Food & Drink Shop,Floating Market
36,Sai Mai,3,Thai Restaurant,Deli / Bodega,Noodle House,Bar,Farmers Market,Food Stand,Food Court,Food & Drink Shop,Floating Market,Flea Market


#### Cluster 5

In [33]:
bk_kmeans_df.loc[bk_kmeans_df['Cluster Labels'] == 4, bk_kmeans_df.columns[[0] + list(range(3, bk_kmeans_df.shape[1]))]]

Unnamed: 0,District,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Bang Phlat,4,Convenience Store,Dog Run,Cocktail Bar,Café,Restaurant,Fast Food Restaurant,Bus Station,Massage Studio,Coffee Shop,Food Stand
14,Din Daeng,4,Convenience Store,Thai Restaurant,Hotel,Recreation Center,Sports Club,Park,Stadium,Farmers Market,Food Court,Food & Drink Shop
15,Don Mueang,4,Restaurant,Spa,Thai Restaurant,Convenience Store,Buffet,Gym,Food Truck,Hotel,Dumpling Restaurant,Donburi Restaurant
18,Khlong Sam Wa,4,Pub,Japanese Restaurant,Coffee Shop,Convenience Store,Restaurant,Thai Restaurant,Dim Sum Restaurant,Dog Run,Donburi Restaurant,Dive Spot
20,Khlong Toei,4,Recording Studio,Bar,Canal,Food Court,Bistro,Thai Restaurant,Karaoke Bar,Convenience Store,Farmers Market,Electronics Store
33,Prawet,4,Convenience Store,Comfort Food Restaurant,Food Stand,Halal Restaurant,Farmers Market,Food Court,Food & Drink Shop,Floating Market,Flea Market,Fast Food Restaurant
38,Saphan Sung,4,Thai Restaurant,Japanese Restaurant,Restaurant,Convenience Store,Stadium,Electronics Store,Food Court,Food & Drink Shop,Floating Market,Flea Market
41,Taling Chan,4,Coffee Shop,Soccer Field,Floating Market,Convenience Store,Farmers Market,Food Stand,Food Court,Food & Drink Shop,Flea Market,Fast Food Restaurant


### Observation

Most of the districts fall into cluster 3 followed by cluster 1, 4 and 5. Just 1 district falls into cluster 2. Therefore, we can believe cluster 3 is the most popular cluster followed by cluster 1, 4 and 5. The least popular cluster is cluster 2. It is a good opportunity to run the business in this clustering sequence: 3, (1, 4, 5), 2.

As most of the districts fall into cluster 3, let's take a closer look at cluster 3 first (see Cluster 3 result). We can see it is full of restaurants with different style cuisine. It should be very competitive when we run a Japanese restaurant here.

Then move on to cluster 1, it is found that it is full of noodle house, Asian restaurant, Chinese restaurant, Thai restaurant and cafe (see Cluster 1 result). People right there should love eating and Asian food. However, just a few Japanese restaurants are opened there. Therefore, cluster 1 is a good choice to open a Japanese restaurant.

Venues in cluster 4 and cluster 5 are too diverse with different categories. It poses more risk than cluster 1 as we are not sure if people right there love eating (see Cluster 4 & 5 results).

Cluster 2 just has one district so it should not be a good start due to low popularity (see Cluster 2 result).

To sum up, it is a good choice to open a Japanese restaurant in districts in cluster 1.