<font size="4">Creating dataframe of Singaporean districts and their coordinates.</font>

Let us first create a dataframe that contains the districts of Singapore and their coordinates by scraping the data from Wikipedia.

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import geocoder # import geocoder
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library
import json # library to handle JSON files
import numpy as np
# import k-means from clustering stage
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

url = "https://en.wikipedia.org/wiki/Planning_Areas_of_Singapore" #Url of Wikipedia page with Toronto neighborhoods and postal codes
data  = requests.get(url).text
soup = BeautifulSoup(data,"html5lib")
tables = soup.find_all('table') #Find tables in the html
planningArea_data = pd.DataFrame(columns=["Planning Area","Latitude","Longitude"])
for row in tables[2].tbody.find_all("tr"):    
    col =row.td
    if col != None:
        planningArea = col.text.strip()
        geolocator = Nominatim(user_agent="sg_explorer")
        address = '{}, SG'.format(planningArea)
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        planningArea_data = planningArea_data.append({"Planning Area": planningArea,"Latitude": latitude,"Longitude" : longitude}, ignore_index=True)
        



In [2]:
planningArea_data.shape

(55, 3)

Create a map of Singapore and show all the districts.

In [3]:
address = 'Singapore'

#Get the latitude and longitude of Singapore
geolocator = Nominatim(user_agent="sg_explorer")
location = geolocator.geocode(address)
latitude = location.latitude 
longitude = location.longitude

# create map of New York using latitude and longitude values
map_singapore = folium.Map(location=[latitude, longitude], zoom_start=10)

#add markers to map
for lat, lng, neighborhood in zip(planningArea_data["Latitude"], planningArea_data["Longitude"], planningArea_data["Planning Area"]):
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_singapore)  
    
map_singapore

Define Foursquare credentials

In [4]:
CLIENT_ID = 'IRKFWJXIIWOMSCV1EBIGSM4C1ODUR1V1WB4XXDKUARYY4WXH' # your Foursquare ID
CLIENT_SECRET = '0B4YEAJQNMY3Y02RUNAWKD0VEX2CD4Q25UH4WPALOSHHNDUU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

<font size="4">Exploring the first district in the planningArea_data Dataframe</font>

In [5]:
print("The first district in the dataframe is {}.".format(planningArea_data.loc[0, "Planning Area"]))

The first district in the dataframe is Ang Mo Kio.


In [6]:
neighborhood_latitude = planningArea_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = planningArea_data.loc[0, 'Longitude'] # neighborhood longitude value

latitude = neighborhood_latitude
longitude = neighborhood_longitude
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '61352c0f15da8916a1347c9d'},
 'response': {'headerLocation': 'Ang Mo Kio',
  'headerFullLocation': 'Ang Mo Kio, Singapore',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 42,
  'suggestedBounds': {'ne': {'lat': 1.3745803045000045,
    'lng': 103.85401568693963},
   'sw': {'lat': 1.3655802954999954, 'lng': 103.84502991306037}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b15f661f964a52012b623e3',
       'name': 'FairPrice Xtra',
       'location': {'address': '#B2-26 AMK Hub',
        'crossStreet': '53 Ang Mo Kio Ave 3',
        'lat': 1.3692792884081397,
        'lng': 103.84888576818767,
        'labeledLatLngs': [{'label': 'display',
          'lat': 1.3692792884081397,
          'lng': 103.84888576818767}],
 

In [7]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,FairPrice Xtra,Supermarket,1.369279,103.848886
1,Old Chang Kee,Snack Place,1.369094,103.848389
2,MOS Burger,Burger Joint,1.36917,103.847831
3,A&W,Fast Food Restaurant,1.369541,103.849043
4,Subway,Sandwich Place,1.369136,103.847612


<font size="4">Exploring the districts of Singapore</font>

First, define a function that can be used to search for venues in each district of Singapore.

In [8]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [9]:
singapore_venues=getNearbyVenues(list(planningArea_data["Planning Area"]),list(planningArea_data['Latitude']),list(planningArea_data['Longitude']))
print(singapore_venues.shape)
singapore_venues.head()

Ang Mo Kio
Bedok
Bishan
Boon Lay
Bukit Batok
Bukit Merah
Bukit Panjang
Bukit Timah
Central Water Catchment
Changi
Changi Bay
Choa Chu Kang
Clementi
Downtown Core
Geylang
Hougang
Jurong East
Jurong West
Kallang
Lim Chu Kang
Mandai
Marina East
Marina South
Marine Parade
Museum
Newton
North-Eastern Islands
Novena
Orchard
Outram
Pasir Ris
Paya Lebar
Pioneer
Punggol
Queenstown
River Valley
Rochor
Seletar
Sembawang
Sengkang
Serangoon
Simpang
Singapore River
Southern Islands
Straits View
Sungei Kadut
Tampines
Tanglin
Tengah
Toa Payoh
Tuas
Western Islands
Western Water Catchment
Woodlands
Yishun
(1676, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Ang Mo Kio,1.37008,103.849523,FairPrice Xtra,1.369279,103.848886,Supermarket
1,Ang Mo Kio,1.37008,103.849523,Old Chang Kee,1.369094,103.848389,Snack Place
2,Ang Mo Kio,1.37008,103.849523,MOS Burger,1.36917,103.847831,Burger Joint
3,Ang Mo Kio,1.37008,103.849523,A&W,1.369541,103.849043,Fast Food Restaurant
4,Ang Mo Kio,1.37008,103.849523,Subway,1.369136,103.847612,Sandwich Place


This dataframe contains over 1600 venues. Let us check the number of venues found in each district.

In [10]:
singapore_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ang Mo Kio,42,42,42,42,42,42
Bedok,60,60,60,60,60,60
Bishan,44,44,44,44,44,44
Boon Lay,70,70,70,70,70,70
Bukit Batok,22,22,22,22,22,22
Bukit Merah,24,24,24,24,24,24
Bukit Panjang,48,48,48,48,48,48
Bukit Timah,11,11,11,11,11,11
Central Water Catchment,3,3,3,3,3,3
Changi,5,5,5,5,5,5


In [11]:
singapore_venues.value_counts(subset=['Venue Category'])

Venue Category      
Chinese Restaurant      88
Coffee Shop             83
Japanese Restaurant     72
Food Court              71
Café                    60
                        ..
River                    1
Gun Range                1
Gourmet Shop             1
Kitchen Supply Store     1
Zoo Exhibit              1
Length: 219, dtype: int64

The most common venue categories are all 

In [12]:
singapore_venues.nunique(axis=0)

Neighborhood                51
Neighborhood Latitude       51
Neighborhood Longitude      51
Venue                     1260
Venue Latitude            1610
Venue Longitude           1609
Venue Category             219
dtype: int64

Only 51 out of 55 neighborhoods have venues.

<font size="4">Types of venues in each neighborhood.</font>

Next, we would like to see what kinds of venues are most common in each Singapore neighborhood. First, we perform one-hot encoding for each venue.

In [13]:
# one hot encoding
singapore_onehot = pd.get_dummies(singapore_venues[['Venue Category']], prefix="", prefix_sep="")
singapore_onehot.drop(['Neighborhood'], axis=1,inplace = True)
singapore_onehot.insert(loc=0, column='Neighborhood', value=singapore_venues['Neighborhood'] )


Next, group the venues by their neighborhoods and calculate the mean frequency for each venue category.

In [14]:
singapore_grouped = singapore_onehot.groupby('Neighborhood').mean().reset_index()
singapore_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Airport,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Waterfront,Whisky Bar,Wine Shop,Wings Joint,Yoga Studio,Zoo Exhibit
0,Ang Mo Kio,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bedok,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,0.05,...,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0
2,Bishan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Boon Lay,0.0,0.0,0.0,0.014286,0.0,0.0,0.0,0.0,0.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014286,0.0,0.0
4,Bukit Batok,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bukit Merah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,...,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Bukit Panjang,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.083333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Bukit Timah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Central Water Catchment,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Changi,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


We can look at the top five most common venues for each neighborhood.

In [15]:
num_top_venues = 5

for hood in singapore_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = singapore_grouped[singapore_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Ang Mo Kio----
                 venue  freq
0          Coffee Shop  0.10
1         Dessert Shop  0.07
2  Japanese Restaurant  0.07
3           Food Court  0.07
4      Bubble Tea Shop  0.07


----Bedok----
                venue  freq
0         Coffee Shop  0.07
1      Sandwich Place  0.05
2          Food Court  0.05
3  Chinese Restaurant  0.05
4    Asian Restaurant  0.05


----Bishan----
              venue  freq
0       Coffee Shop  0.09
1        Food Court  0.09
2    Ice Cream Shop  0.07
3   Bubble Tea Shop  0.07
4  Asian Restaurant  0.05


----Boon Lay----
                  venue  freq
0   Japanese Restaurant  0.11
1      Asian Restaurant  0.10
2  Fast Food Restaurant  0.07
3    Chinese Restaurant  0.07
4          Dessert Shop  0.06


----Bukit Batok----
                venue  freq
0         Coffee Shop  0.18
1  Chinese Restaurant  0.14
2          Food Court  0.09
3         Bus Station  0.09
4    Malay Restaurant  0.05


----Bukit Merah----
                  venue  freq
0    Chin

               venue  freq
0        Snack Place   1.0
1                ATM   0.0
2  Outdoor Sculpture   0.0
3      Movie Theater   0.0
4          Multiplex   0.0


----Toa Payoh----
                venue  freq
0        Noodle House  0.15
1  Chinese Restaurant  0.12
2          Food Court  0.10
3         Coffee Shop  0.08
4     Thai Restaurant  0.06


----Tuas----
          venue  freq
0      Bus Stop   0.5
1   Coffee Shop   0.5
2           ATM   0.0
3  Outlet Store   0.0
4     Multiplex   0.0


----Western Water Catchment----
               venue  freq
0          Gun Range   1.0
1                ATM   0.0
2  Outdoor Sculpture   0.0
3      Movie Theater   0.0
4          Multiplex   0.0


----Woodlands----
                  venue  freq
0   Japanese Restaurant  0.08
1                  Café  0.06
2           Coffee Shop  0.06
3  Fast Food Restaurant  0.04
4           Supermarket  0.04


----Yishun----
                 venue  freq
0           Food Court  0.11
1   Chinese Restaurant  0.07
2  

This information can also be put into a dataframe showing the top ten most common venues for each Singaporean neighborhood.

In [16]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = singapore_grouped['Neighborhood']

for ind in np.arange(singapore_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(singapore_grouped.iloc[ind, :], num_top_venues)

In [17]:
neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ang Mo Kio,Coffee Shop,Dessert Shop,Japanese Restaurant,Food Court,Bubble Tea Shop,Supermarket,Noodle House,Pharmacy,Snack Place,Malay Restaurant
1,Bedok,Coffee Shop,Noodle House,Asian Restaurant,Food Court,Japanese Restaurant,Sandwich Place,Chinese Restaurant,Burger Joint,Thrift / Vintage Store,Supermarket
2,Bishan,Coffee Shop,Food Court,Ice Cream Shop,Bubble Tea Shop,Supermarket,Chinese Restaurant,Japanese Restaurant,Asian Restaurant,Café,Cosmetics Shop
3,Boon Lay,Japanese Restaurant,Asian Restaurant,Fast Food Restaurant,Chinese Restaurant,Dessert Shop,Café,Coffee Shop,Indian Restaurant,Halal Restaurant,Bubble Tea Shop
4,Bukit Batok,Coffee Shop,Chinese Restaurant,Food Court,Bus Station,Shopping Mall,Frozen Yogurt Shop,Bowling Alley,Multiplex,Malay Restaurant,Café
5,Bukit Merah,Chinese Restaurant,Food Court,Café,Gym,Bus Stop,Cable Car,Mountain,Scenic Lookout,Bowling Alley,Sporting Goods Shop
6,Bukit Panjang,Asian Restaurant,Sushi Restaurant,Fast Food Restaurant,Café,Pharmacy,Supermarket,Restaurant,Bubble Tea Shop,Shopping Mall,Coffee Shop
7,Bukit Timah,Trail,Scenic Lookout,Hill,Rest Area,Zoo Exhibit,Food Service,Food Court,Food,Flower Shop,Flea Market
8,Central Water Catchment,Reservoir,Business Service,Zoo Exhibit,Farm,Food Stand,Food Service,Food Court,Food,Flower Shop,Flea Market
9,Changi,Airport,Hotel,Spa,Rest Area,Farmers Market,Food Truck,Food Stand,Food Service,Food Court,Food


Next, we would like to rank the neighborhoods in terms of number of gyms.

In [18]:
singapore_venues[(singapore_venues["Venue Category"] == 'Gym') | (singapore_venues["Venue Category"] == 'Gym / Fitness Center')].value_counts(subset=['Neighborhood']).to_frame().reset_index()             

Unnamed: 0,Neighborhood,0
0,Tampines,4
1,Singapore River,3
2,Downtown Core,3
3,Ang Mo Kio,2
4,Pioneer,2
5,Bukit Panjang,2
6,Clementi,2
7,Hougang,1
8,Bishan,1
9,Boon Lay,1


Let us look at the top six Singaporean neighborhoods with the most gyms.

In [19]:
neighborhoodsMostGyms = singapore_venues[(singapore_venues["Venue Category"] == 'Gym') | (singapore_venues["Venue Category"] == 'Gym / Fitness Center')].value_counts(subset=['Neighborhood']).to_frame().reset_index()['Neighborhood'][0:6]          
for index,value in neighborhoodsMostGyms.iteritems():
    print(value)

Tampines
Singapore River
Downtown Core
Ang Mo Kio
Pioneer
Bukit Panjang


Let us plot these neighborhoods on a map.

In [20]:
address = 'Singapore'

#Get the latitude and longitude of Singapore
geolocator = Nominatim(user_agent="sg_explorer")
location = geolocator.geocode(address)
latitude = location.latitude 
longitude = location.longitude

# create map of New York using latitude and longitude values
map_singapore = folium.Map(location=[latitude, longitude], zoom_start=10)

#add markers to map
neighborhoodsMostGyms = singapore_venues[(singapore_venues["Venue Category"] == 'Gym') | (singapore_venues["Venue Category"] == 'Gym / Fitness Center')].value_counts(subset=['Neighborhood']).to_frame().reset_index()['Neighborhood'][0:6]          
for index,value in neighborhoodsMostGyms.iteritems():
    neighborhood = value
    lat = planningArea_data.loc[planningArea_data['Planning Area'] == value, 'Latitude'].iloc[0]
    lng = planningArea_data.loc[planningArea_data['Planning Area'] == value, 'Longitude'].iloc[0]    
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_singapore)  
    
map_singapore

Next, we can look at which neighborhoods have the most malls.

In [21]:
singapore_venues[singapore_venues["Venue Category"] == 'Shopping Mall']

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
24,Ang Mo Kio,1.37008,103.849523,AMK Hub,1.369247,103.848525,Shopping Mall
66,Bedok,1.323976,103.930216,Bedok Mall,1.324984,103.929361,Shopping Mall
118,Bishan,1.350986,103.848255,Junction 8 Shopping Centre,1.350534,103.848732,Shopping Mall
153,Boon Lay,1.33855,103.705812,Jurong Point,1.339121,103.706352,Shopping Mall
230,Bukit Batok,1.349057,103.749591,West Mall,1.34991,103.749199,Shopping Mall
268,Bukit Panjang,1.377917,103.763095,Hillion Mall,1.378074,103.76267,Shopping Mall
275,Bukit Panjang,1.377917,103.763095,Bukit Panjang Plaza,1.380137,103.764153,Shopping Mall
402,Clementi,1.3151,103.765231,321 Clementi,1.311964,103.765072,Shopping Mall
407,Clementi,1.3151,103.765231,The Clementi Mall,1.315036,103.764909,Shopping Mall
540,Hougang,1.370801,103.892544,Hougang Mall,1.372702,103.893793,Shopping Mall


In [22]:
singapore_venues[singapore_venues["Venue Category"] == 'Shopping Mall'].value_counts(subset=['Neighborhood']).to_frame().reset_index()

Unnamed: 0,Neighborhood,0
0,Paya Lebar,6
1,Orchard,4
2,Tampines,3
3,Jurong East,3
4,Sembawang,2
5,Bukit Panjang,2
6,Woodlands,2
7,Clementi,2
8,Yishun,1
9,Hougang,1


In [23]:
neighborhoodsMostMalls = singapore_venues[singapore_venues["Venue Category"] == 'Shopping Mall'].value_counts(subset=['Neighborhood']).to_frame().reset_index()['Neighborhood'][0:7]                      
neighborhoodsMostMalls

0       Paya Lebar
1          Orchard
2         Tampines
3      Jurong East
4        Sembawang
5    Bukit Panjang
6        Woodlands
Name: Neighborhood, dtype: object

In [24]:
address = 'Singapore'

#Get the latitude and longitude of Singapore
geolocator = Nominatim(user_agent="sg_explorer")
location = geolocator.geocode(address)
latitude = location.latitude 
longitude = location.longitude

# create map of New York using latitude and longitude values
map_singapore = folium.Map(location=[latitude, longitude], zoom_start=10)

#add markers to map
for index,value in neighborhoodsMostMalls.iteritems():
    neighborhood = value
    lat = planningArea_data.loc[planningArea_data['Planning Area'] == value, 'Latitude'].iloc[0]
    lng = planningArea_data.loc[planningArea_data['Planning Area'] == value, 'Longitude'].iloc[0]    
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_singapore)  
    
map_singapore

Lastly, let us see which neighborhoods have the most coffee shops.

In [25]:
singapore_venues[singapore_venues["Venue Category"] == 'Coffee Shop'].value_counts(subset=['Neighborhood']).to_frame().reset_index()

Unnamed: 0,Neighborhood,0
0,Novena,6
1,Jurong East,6
2,Clementi,6
3,Bedok,4
4,Bishan,4
5,Bukit Batok,4
6,Ang Mo Kio,4
7,Toa Payoh,4
8,Tampines,4
9,Jurong West,3


The top eight neighborhoods with the most coffee shops are listed below:

In [26]:
neighborhoodsMostCoffeeShops = singapore_venues[singapore_venues["Venue Category"] == 'Coffee Shop'].value_counts(subset=['Neighborhood']).to_frame().reset_index()['Neighborhood'][0:8]        
neighborhoodsMostCoffeeShops

0         Novena
1    Jurong East
2       Clementi
3          Bedok
4         Bishan
5    Bukit Batok
6     Ang Mo Kio
7      Toa Payoh
Name: Neighborhood, dtype: object

In [27]:
address = 'Singapore'

#Get the latitude and longitude of Singapore
geolocator = Nominatim(user_agent="sg_explorer")
location = geolocator.geocode(address)
latitude = location.latitude 
longitude = location.longitude

# create map of New York using latitude and longitude values
map_singapore = folium.Map(location=[latitude, longitude], zoom_start=10)

#add markers to map
for index,value in neighborhoodsMostCoffeeShops.iteritems():
    neighborhood = value
    lat = planningArea_data.loc[planningArea_data['Planning Area'] == value, 'Latitude'].iloc[0]
    lng = planningArea_data.loc[planningArea_data['Planning Area'] == value, 'Longitude'].iloc[0]    
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_singapore)  
    
map_singapore

<font size="4">Clustering Neighborhoods</font>

Let us perform k-means clustering on the Singaporean neighborhoods.

In [28]:
# set number of clusters
kclusters = 5

singapore_grouped_clustering = singapore_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(singapore_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

To understand these clusters, let us create a dataframe that shows the cluster labels for these neighborhoods as well as the ten most common venues in these neighborhoods.

In [29]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

singapore_merged = planningArea_data.rename(columns={"Planning Area": 'Neighborhood'})

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
singapore_merged = singapore_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

singapore_merged # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ang Mo Kio,1.37008,103.849523,0.0,Coffee Shop,Dessert Shop,Japanese Restaurant,Food Court,Bubble Tea Shop,Supermarket,Noodle House,Pharmacy,Snack Place,Malay Restaurant
1,Bedok,1.323976,103.930216,0.0,Coffee Shop,Noodle House,Asian Restaurant,Food Court,Japanese Restaurant,Sandwich Place,Chinese Restaurant,Burger Joint,Thrift / Vintage Store,Supermarket
2,Bishan,1.350986,103.848255,0.0,Coffee Shop,Food Court,Ice Cream Shop,Bubble Tea Shop,Supermarket,Chinese Restaurant,Japanese Restaurant,Asian Restaurant,Café,Cosmetics Shop
3,Boon Lay,1.33855,103.705812,0.0,Japanese Restaurant,Asian Restaurant,Fast Food Restaurant,Chinese Restaurant,Dessert Shop,Café,Coffee Shop,Indian Restaurant,Halal Restaurant,Bubble Tea Shop
4,Bukit Batok,1.349057,103.749591,0.0,Coffee Shop,Chinese Restaurant,Food Court,Bus Station,Shopping Mall,Frozen Yogurt Shop,Bowling Alley,Multiplex,Malay Restaurant,Café
5,Bukit Merah,1.274864,103.820276,0.0,Chinese Restaurant,Food Court,Café,Gym,Bus Stop,Cable Car,Mountain,Scenic Lookout,Bowling Alley,Sporting Goods Shop
6,Bukit Panjang,1.377917,103.763095,0.0,Asian Restaurant,Sushi Restaurant,Fast Food Restaurant,Café,Pharmacy,Supermarket,Restaurant,Bubble Tea Shop,Shopping Mall,Coffee Shop
7,Bukit Timah,1.35469,103.776372,0.0,Trail,Scenic Lookout,Hill,Rest Area,Zoo Exhibit,Food Service,Food Court,Food,Flower Shop,Flea Market
8,Central Water Catchment,1.370059,103.803448,0.0,Reservoir,Business Service,Zoo Exhibit,Farm,Food Stand,Food Service,Food Court,Food,Flower Shop,Flea Market
9,Changi,1.35108,103.990064,0.0,Airport,Hotel,Spa,Rest Area,Farmers Market,Food Truck,Food Stand,Food Service,Food Court,Food


Most neighborhoods have been dumped into cluster 0 while the obvious outliers like Mandai and Western Water Catchment have been assigned their own cluster. To have any meaningful clustering, we need to remove these neighborhoods from out data.

<font size="4">Clustering Neighborhoods with 4 Clusters</font>

In [None]:
neighborhoods_to_remove = ["Mandai", "North-Eastern Islands", "Simpang", "Southern Islands", "Straits View", "Tengah","Western Islands","Western Water Catchment","Bukit Timah","Central Water Catchment","Marina East","Tuas","Changi","Changi Bay","Kallang","Lim Chu Kang","Marina South","Rochor","Seletar","Sungei Kadut","Jurong West"]                               

In [None]:
singapore_data = planningArea_data.rename(columns={"Planning Area": 'Neighborhood'})
index_names = singapore_data[ singapore_data['Neighborhood'].isin(neighborhoods_to_remove)].index
singapore_data.drop(index_names, inplace = True)
singapore_data.reset_index(inplace = True,drop=True)
singapore_data.shape

There are now only 47 neighborhoods instead of the original 55. We repeat this with the other relevant dataframes

In [None]:
index_names = singapore_grouped[ singapore_grouped['Neighborhood'].isin(neighborhoods_to_remove)].index
singapore_grouped.drop(index_names, inplace = True)
singapore_grouped.reset_index(inplace = True,drop=True)
singapore_grouped.shape

In [None]:
neighborhoods_venues_sorted.drop(['Cluster Labels'], axis=1,inplace = True)
index_names = neighborhoods_venues_sorted[ neighborhoods_venues_sorted['Neighborhood'].isin(neighborhoods_to_remove)].index
neighborhoods_venues_sorted.drop(index_names, inplace = True)
neighborhoods_venues_sorted.reset_index(inplace = True,drop=True)
neighborhoods_venues_sorted.shape

Having removed the outlier neighborhoods, we repeat the k-means clustering again.

In [None]:
# set number of clusters
kclusters = 4

singapore_grouped_clustering = singapore_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(singapore_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

In [None]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

singapore_merged = singapore_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
singapore_merged = singapore_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

singapore_merged # check the last columns!

In [None]:
singapore_merged.value_counts(subset=['Cluster Labels'])

<font size="4">Clustering Neighborhoods with 5 Clusters</font>

In [30]:
neighborhoods_to_remove = ["Mandai", "North-Eastern Islands", "Simpang", "Southern Islands", "Straits View", "Tengah","Western Islands","Western Water Catchment","Bukit Timah","Central Water Catchment","Marina East","Tuas","Changi","Changi Bay","Kallang","Lim Chu Kang","Marina South","Rochor","Seletar","Sungei Kadut","Jurong West"]  

In [31]:
singapore_data = planningArea_data.rename(columns={"Planning Area": 'Neighborhood'})
index_names = singapore_data[ singapore_data['Neighborhood'].isin(neighborhoods_to_remove)].index
singapore_data.drop(index_names, inplace = True)
singapore_data.reset_index(inplace = True,drop=True)
singapore_data.shape

(34, 3)

In [32]:
index_names = singapore_grouped[ singapore_grouped['Neighborhood'].isin(neighborhoods_to_remove)].index
singapore_grouped.drop(index_names, inplace = True)
singapore_grouped.reset_index(inplace = True,drop=True)
singapore_grouped.shape

(34, 219)

In [33]:
neighborhoods_venues_sorted.drop(['Cluster Labels'], axis=1,inplace = True)
index_names = neighborhoods_venues_sorted[ neighborhoods_venues_sorted['Neighborhood'].isin(neighborhoods_to_remove)].index
neighborhoods_venues_sorted.drop(index_names, inplace = True)
neighborhoods_venues_sorted.reset_index(inplace = True,drop=True)
neighborhoods_venues_sorted.shape

(34, 11)

In [34]:
# set number of clusters
kclusters = 5

singapore_grouped_clustering = singapore_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(singapore_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 0, 4, 1, 1, 4, 1, 1], dtype=int32)

In [35]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

singapore_merged = singapore_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
singapore_merged = singapore_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

singapore_merged # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ang Mo Kio,1.37008,103.849523,1,Coffee Shop,Dessert Shop,Japanese Restaurant,Food Court,Bubble Tea Shop,Supermarket,Noodle House,Pharmacy,Snack Place,Malay Restaurant
1,Bedok,1.323976,103.930216,1,Coffee Shop,Noodle House,Asian Restaurant,Food Court,Japanese Restaurant,Sandwich Place,Chinese Restaurant,Burger Joint,Thrift / Vintage Store,Supermarket
2,Bishan,1.350986,103.848255,1,Coffee Shop,Food Court,Ice Cream Shop,Bubble Tea Shop,Supermarket,Chinese Restaurant,Japanese Restaurant,Asian Restaurant,Café,Cosmetics Shop
3,Boon Lay,1.33855,103.705812,0,Japanese Restaurant,Asian Restaurant,Fast Food Restaurant,Chinese Restaurant,Dessert Shop,Café,Coffee Shop,Indian Restaurant,Halal Restaurant,Bubble Tea Shop
4,Bukit Batok,1.349057,103.749591,4,Coffee Shop,Chinese Restaurant,Food Court,Bus Station,Shopping Mall,Frozen Yogurt Shop,Bowling Alley,Multiplex,Malay Restaurant,Café
5,Bukit Merah,1.274864,103.820276,1,Chinese Restaurant,Food Court,Café,Gym,Bus Stop,Cable Car,Mountain,Scenic Lookout,Bowling Alley,Sporting Goods Shop
6,Bukit Panjang,1.377917,103.763095,1,Asian Restaurant,Sushi Restaurant,Fast Food Restaurant,Café,Pharmacy,Supermarket,Restaurant,Bubble Tea Shop,Shopping Mall,Coffee Shop
7,Choa Chu Kang,1.384749,103.744534,4,Food Court,Fast Food Restaurant,Bus Station,Asian Restaurant,Coffee Shop,Bus Stop,Bakery,Bowling Alley,Supermarket,Sandwich Place
8,Clementi,1.3151,103.765231,1,Coffee Shop,Food Court,Dessert Shop,Fast Food Restaurant,Chinese Restaurant,Asian Restaurant,Bakery,Chinese Breakfast Place,Fried Chicken Joint,Noodle House
9,Downtown Core,1.286242,103.853776,1,Concert Hall,Gym / Fitness Center,Hotel,Salad Place,Cocktail Bar,French Restaurant,Coffee Shop,Performing Arts Venue,Hotel Bar,Lounge


In [36]:
singapore_merged.value_counts(subset=['Cluster Labels'])

Cluster Labels
1                 13
0                 10
4                  6
2                  4
3                  1
dtype: int64

In [37]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(singapore_merged['Latitude'], singapore_merged['Longitude'], singapore_merged['Neighborhood'], singapore_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Cluster 1

In [38]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 0, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,1.33855,Asian Restaurant,Fast Food Restaurant,Chinese Restaurant,Dessert Shop,Café,Coffee Shop,Indian Restaurant,Halal Restaurant,Bubble Tea Shop
13,1.302689,Multiplex,Massage Studio,Japanese Restaurant,Chinese Restaurant,Supermarket,Indian Restaurant,Lounge,Salad Place,Clothing Store
14,1.295656,Dessert Shop,Japanese Restaurant,Cosmetics Shop,Bubble Tea Shop,Concert Hall,Sushi Restaurant,Restaurant,Whisky Bar,French Restaurant
17,1.303427,Japanese Restaurant,Bakery,Chinese Restaurant,Hotel,Sushi Restaurant,Cosmetics Shop,Shopping Mall,Indonesian Restaurant,Department Store
22,1.405197,Chinese Restaurant,Bubble Tea Shop,Bakery,Park,Snack Place,Supermarket,Fast Food Restaurant,Breakfast Spot,Light Rail Station
24,1.297683,Café,Shoe Store,Hotel,Clothing Store,Fast Food Restaurant,Salad Place,Movie Theater,Bus Stop,Men's Store
28,1.289178,Hotel,Nightclub,Thai Restaurant,Food Court,Italian Restaurant,Bar,Café,Vegetarian / Vegan Restaurant,Noodle House
29,1.354653,Bakery,Coffee Shop,Bubble Tea Shop,Shopping Mall,Thai Restaurant,Gym,Chinese Restaurant,Japanese Restaurant,Supermarket
30,1.306044,Garden,Thai Restaurant,French Restaurant,Bakery,Bar,Gastropub,Lake,Lounge,Brewery
32,1.436897,Coffee Shop,Café,Frozen Yogurt Shop,Shopping Mall,Supermarket,Electronics Store,Asian Restaurant,Clothing Store,Chinese Restaurant


Cluster 2

In [39]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 1, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1.37008,Dessert Shop,Japanese Restaurant,Food Court,Bubble Tea Shop,Supermarket,Noodle House,Pharmacy,Snack Place,Malay Restaurant
1,1.323976,Noodle House,Asian Restaurant,Food Court,Japanese Restaurant,Sandwich Place,Chinese Restaurant,Burger Joint,Thrift / Vintage Store,Supermarket
2,1.350986,Food Court,Ice Cream Shop,Bubble Tea Shop,Supermarket,Chinese Restaurant,Japanese Restaurant,Asian Restaurant,Café,Cosmetics Shop
5,1.274864,Food Court,Café,Gym,Bus Stop,Cable Car,Mountain,Scenic Lookout,Bowling Alley,Sporting Goods Shop
6,1.377917,Sushi Restaurant,Fast Food Restaurant,Café,Pharmacy,Supermarket,Restaurant,Bubble Tea Shop,Shopping Mall,Coffee Shop
8,1.3151,Food Court,Dessert Shop,Fast Food Restaurant,Chinese Restaurant,Asian Restaurant,Bakery,Chinese Breakfast Place,Fried Chicken Joint,Noodle House
9,1.286242,Gym / Fitness Center,Hotel,Salad Place,Cocktail Bar,French Restaurant,Coffee Shop,Performing Arts Venue,Hotel Bar,Lounge
12,1.333108,Food Court,Café,Chinese Restaurant,Japanese Restaurant,Shopping Mall,Sandwich Place,Bubble Tea Shop,Clothing Store,Multiplex
16,1.320526,Café,Hotel,Asian Restaurant,Hainan Restaurant,Ramen Restaurant,Italian Restaurant,Hotpot Restaurant,Bakery,Chinese Restaurant
19,1.373031,Diner,Fast Food Restaurant,Italian Restaurant,Sandwich Place,Salon / Barbershop,Snack Place,Shopping Mall,Bus Line,Seafood Restaurant


Cluster 3

In [40]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 2, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,1.318186,Noodle House,Food Court,Vegetarian / Vegan Restaurant,Dim Sum Restaurant,Grocery Store,Asian Restaurant,Seafood Restaurant,Fast Food Restaurant,Cantonese Restaurant
15,1.313183,Seafood Restaurant,Hotel Bar,Italian Restaurant,Convenience Store,Gym / Fitness Center,Movie Theater,Dance Studio,Café,Noodle House
23,1.294623,Food Court,Chinese Restaurant,Italian Restaurant,Spa,Seafood Restaurant,Stadium,Steakhouse,Café,Pool
31,1.335391,Chinese Restaurant,Food Court,Coffee Shop,Thai Restaurant,Snack Place,Steakhouse,Asian Restaurant,Café,Grocery Store


Cluster 4

In [41]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 3, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,1.28287,Grocery Store,Hotel,Seafood Restaurant,Dim Sum Restaurant,Gastropub,Coffee Shop,Spanish Restaurant,Café,Nail Salon


Cluster 5

In [42]:
singapore_merged.loc[singapore_merged['Cluster Labels'] == 4, singapore_merged.columns[[1] + list(range(5, singapore_merged.shape[1]))]]

Unnamed: 0,Latitude,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,1.349057,Chinese Restaurant,Food Court,Bus Station,Shopping Mall,Frozen Yogurt Shop,Bowling Alley,Multiplex,Malay Restaurant,Café
7,1.384749,Fast Food Restaurant,Bus Station,Asian Restaurant,Coffee Shop,Bus Stop,Bakery,Bowling Alley,Supermarket,Sandwich Place
11,1.370801,Chinese Restaurant,Fast Food Restaurant,Café,Coffee Shop,Supermarket,Pharmacy,Bus Station,Shopping Mall,Sandwich Place
21,1.337588,ATM,Basketball Court,Gym / Fitness Center,Gym,Flower Shop,Fast Food Restaurant,Noodle House,Dance Studio,Convenience Store
25,1.449093,Shopping Mall,Fast Food Restaurant,Chinese Restaurant,Japanese Restaurant,Smoke Shop,Bus Station,Food,Sporting Goods Shop,Bistro
26,1.391924,Fast Food Restaurant,Bus Station,Food Court,Coffee Shop,Sushi Restaurant,Snack Place,Sandwich Place,Thai Restaurant,Sculpture Garden
