<font size="4">Creating dataframe of Singaporean districts and their coordinates.</font>

Let us first create a dataframe that contains the districts of Singapore and their coordinates by scraping the data from Wikipedia.

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import geocoder # import geocoder
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library
import json # library to handle JSON files
import numpy as np

url = "https://en.wikipedia.org/wiki/Planning_Areas_of_Singapore" #Url of Wikipedia page with Toronto neighborhoods and postal codes
data  = requests.get(url).text
soup = BeautifulSoup(data,"html5lib")
tables = soup.find_all('table') #Find tables in the html
planningArea_data = pd.DataFrame(columns=["Planning Area","Latitude","Longitude"])
for row in tables[2].tbody.find_all("tr"):    
    col =row.td
    if col != None:
        planningArea = col.text.strip()
        geolocator = Nominatim(user_agent="sg_explorer")
        address = '{}, SG'.format(planningArea)
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        planningArea_data = planningArea_data.append({"Planning Area": planningArea,"Latitude": latitude,"Longitude" : longitude}, ignore_index=True)
        

Create a map of Singapore and show all the districts.

In [2]:
address = 'Singapore'

#Get the latitude and longitude of Singapore
geolocator = Nominatim(user_agent="sg_explorer")
location = geolocator.geocode(address)
latitude = location.latitude 
longitude = location.longitude

# create map of New York using latitude and longitude values
map_singapore = folium.Map(location=[latitude, longitude], zoom_start=10)

#add markers to map
for lat, lng, neighborhood in zip(planningArea_data["Latitude"], planningArea_data["Longitude"], planningArea_data["Planning Area"]):
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_singapore)  
    
map_singapore

Define Foursquare credentials

In [3]:
CLIENT_ID = 'IRKFWJXIIWOMSCV1EBIGSM4C1ODUR1V1WB4XXDKUARYY4WXH' # your Foursquare ID
CLIENT_SECRET = '0B4YEAJQNMY3Y02RUNAWKD0VEX2CD4Q25UH4WPALOSHHNDUU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

<font size="4">Exploring the first district in the planningArea_data Dataframe</font>

In [4]:
print("The first district in the dataframe is {}.".format(planningArea_data.loc[0, "Planning Area"]))

The first district in the dataframe is Ang Mo Kio.


In [5]:
neighborhood_latitude = planningArea_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = planningArea_data.loc[0, 'Longitude'] # neighborhood longitude value

latitude = neighborhood_latitude
longitude = neighborhood_longitude
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '613172c16060e37eb4489b93'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Ang Mo Kio',
  'headerFullLocation': 'Ang Mo Kio, Singapore',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 41,
  'suggestedBounds': {'ne': {'lat': 1.3745803045000045,
    'lng': 103.85401568693963},
   'sw': {'lat': 1.3655802954999954, 'lng': 103.84502991306037}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b15f661f964a52012b623e3',
       'name': 'FairPrice Xtra',
       'location': {'address': '#B2-26 AMK Hub',
        'crossStreet': '53 Ang Mo Kio Ave 3',
        'lat': 1.3692792884081397,
        'lng': 103.84888576818767,
        'labeledLat

In [6]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,FairPrice Xtra,Supermarket,1.369279,103.848886
1,Old Chang Kee,Snack Place,1.369094,103.848389
2,MOS Burger,Burger Joint,1.36917,103.847831
3,A&W,Fast Food Restaurant,1.369541,103.849043
4,Subway,Sandwich Place,1.369136,103.847612


<font size="4">Exploring the districts of Singapore</font>

First, define a function that can be used to search for venues in each district of Singapore.

In [7]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [8]:
singapore_venues=getNearbyVenues(list(planningArea_data["Planning Area"]),list(planningArea_data['Latitude']),list(planningArea_data['Longitude']))
print(singapore_venues.shape)
singapore_venues.head()

Ang Mo Kio
Bedok
Bishan
Boon Lay
Bukit Batok
Bukit Merah
Bukit Panjang
Bukit Timah
Central Water Catchment
Changi
Changi Bay
Choa Chu Kang
Clementi
Downtown Core
Geylang
Hougang
Jurong East
Jurong West
Kallang
Lim Chu Kang
Mandai
Marina East
Marina South
Marine Parade
Museum
Newton
North-Eastern Islands
Novena
Orchard
Outram
Pasir Ris
Paya Lebar
Pioneer
Punggol
Queenstown
River Valley
Rochor
Seletar
Sembawang
Sengkang
Serangoon
Simpang
Singapore River
Southern Islands
Straits View
Sungei Kadut
Tampines
Tanglin
Tengah
Toa Payoh
Tuas
Western Islands
Western Water Catchment
Woodlands
Yishun
(1675, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Ang Mo Kio,1.37008,103.849523,FairPrice Xtra,1.369279,103.848886,Supermarket
1,Ang Mo Kio,1.37008,103.849523,Old Chang Kee,1.369094,103.848389,Snack Place
2,Ang Mo Kio,1.37008,103.849523,MOS Burger,1.36917,103.847831,Burger Joint
3,Ang Mo Kio,1.37008,103.849523,A&W,1.369541,103.849043,Fast Food Restaurant
4,Ang Mo Kio,1.37008,103.849523,Subway,1.369136,103.847612,Sandwich Place


This dataframe contains over 1600 venues. Let us check the number of venues found in each district.

In [9]:
singapore_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ang Mo Kio,41,41,41,41,41,41
Bedok,58,58,58,58,58,58
Bishan,43,43,43,43,43,43
Boon Lay,70,70,70,70,70,70
Bukit Batok,19,19,19,19,19,19
Bukit Merah,21,21,21,21,21,21
Bukit Panjang,57,57,57,57,57,57
Bukit Timah,11,11,11,11,11,11
Central Water Catchment,3,3,3,3,3,3
Changi,7,7,7,7,7,7


<font size="4">Types of venues in each neighborhood.</font>

Next, we would like to see what kinds of venues are most common in each Singapore neighborhood. First, we perform one-hot encoding for each venue.

In [10]:
# one hot encoding
singapore_onehot = pd.get_dummies(singapore_venues[['Venue Category']], prefix="", prefix_sep="")
singapore_onehot.drop(['Neighborhood'], axis=1,inplace = True)
singapore_onehot.insert(loc=0, column='Neighborhood', value=singapore_venues['Neighborhood'] )


Next, group the venues by their neighborhoods and calculate the mean frequency for each venue category.

In [11]:
singapore_grouped = singapore_onehot.groupby('Neighborhood').mean().reset_index()
singapore_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Airport,Airport Terminal,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,...,Video Game Store,Video Store,Vietnamese Restaurant,Water Park,Waterfront,Whisky Bar,Wine Shop,Wings Joint,Yoga Studio,Zoo Exhibit
0,Ang Mo Kio,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bedok,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0
2,Bishan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Boon Lay,0.0,0.0,0.0,0.0,0.014286,0.0,0.0,0.0,0.0,...,0.0,0.014286,0.0,0.0,0.0,0.0,0.0,0.014286,0.0,0.0
4,Bukit Batok,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bukit Merah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Bukit Panjang,0.0,0.0,0.0,0.0,0.035088,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0
7,Bukit Timah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Central Water Catchment,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Changi,0.0,0.0,0.285714,0.285714,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


We can look at the top five most common venues for each neighborhood.

In [12]:
num_top_venues = 5

for hood in singapore_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = singapore_grouped[singapore_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Ang Mo Kio----
             venue  freq
0      Coffee Shop  0.10
1  Bubble Tea Shop  0.07
2       Food Court  0.07
3     Dessert Shop  0.07
4      Supermarket  0.05


----Bedok----
                venue  freq
0         Coffee Shop  0.07
1          Food Court  0.05
2        Noodle House  0.05
3  Chinese Restaurant  0.05
4    Asian Restaurant  0.05


----Bishan----
              venue  freq
0        Food Court  0.09
1       Coffee Shop  0.09
2    Ice Cream Shop  0.07
3   Bubble Tea Shop  0.07
4  Asian Restaurant  0.05


----Boon Lay----
                  venue  freq
0   Japanese Restaurant  0.11
1      Asian Restaurant  0.10
2  Fast Food Restaurant  0.09
3    Chinese Restaurant  0.06
4          Dessert Shop  0.06


----Bukit Batok----
                           venue  freq
0                    Coffee Shop  0.21
1                     Food Court  0.11
2             Chinese Restaurant  0.11
3  Vegetarian / Vegan Restaurant  0.05
4                           Café  0.05


----Bukit Merah--

This information can also be put into a dataframe showing the top ten most common venues for each Singaporean neighborhood.

In [13]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = singapore_grouped['Neighborhood']

for ind in np.arange(singapore_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(singapore_grouped.iloc[ind, :], num_top_venues)

In [14]:
neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ang Mo Kio,Coffee Shop,Dessert Shop,Food Court,Bubble Tea Shop,Supermarket,Japanese Restaurant,Snack Place,Ramen Restaurant,Noodle House,Fast Food Restaurant
1,Bedok,Coffee Shop,Noodle House,Asian Restaurant,Food Court,Sandwich Place,Japanese Restaurant,Chinese Restaurant,Thrift / Vintage Store,Supermarket,Dessert Shop
2,Bishan,Food Court,Coffee Shop,Bubble Tea Shop,Ice Cream Shop,Cosmetics Shop,Japanese Restaurant,Café,Asian Restaurant,Chinese Restaurant,Supermarket
3,Boon Lay,Japanese Restaurant,Asian Restaurant,Fast Food Restaurant,Dessert Shop,Chinese Restaurant,Coffee Shop,Bus Station,Indian Restaurant,Café,Cosmetics Shop
4,Bukit Batok,Coffee Shop,Food Court,Chinese Restaurant,Department Store,Fast Food Restaurant,Frozen Yogurt Shop,Shopping Mall,Bowling Alley,Café,Malay Restaurant
5,Bukit Merah,Chinese Restaurant,Food Court,Karaoke Bar,Gym,Café,Pool,Club House,Park,Bowling Alley,Cable Car
6,Bukit Panjang,Sushi Restaurant,Korean Restaurant,Fast Food Restaurant,Asian Restaurant,Chinese Restaurant,Bubble Tea Shop,Supermarket,Shopping Mall,Bus Station,Seafood Restaurant
7,Bukit Timah,Trail,Scenic Lookout,Rest Area,Hill,Zoo Exhibit,Farmers Market,Fried Chicken Joint,French Restaurant,Food Stand,Food Service
8,Central Water Catchment,Reservoir,Business Service,Zoo Exhibit,Filipino Restaurant,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint,French Restaurant,Food Stand,Food Service
9,Changi,Airport,Airport Terminal,Hotel,Rest Area,Food Court,Zoo Exhibit,Filipino Restaurant,Furniture / Home Store,Frozen Yogurt Shop,Fried Chicken Joint


Next, we would like to rank the neighborhoods in terms of number of gyms.

In [21]:
singapore_venues[(singapore_venues["Venue Category"] == 'Gym') | (singapore_venues["Venue Category"] == 'Gym / Fitness Center')].groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ang Mo Kio,2,2,2,2,2,2
Bishan,1,1,1,1,1,1
Boon Lay,1,1,1,1,1,1
Bukit Merah,2,2,2,2,2,2
Bukit Panjang,1,1,1,1,1,1
Clementi,2,2,2,2,2,2
Downtown Core,3,3,3,3,3,3
Hougang,1,1,1,1,1,1
Jurong East,1,1,1,1,1,1
Jurong West,1,1,1,1,1,1


In [25]:
singapore_venues[(singapore_venues["Venue Category"] == 'Gym') | (singapore_venues["Venue Category"] == 'Gym / Fitness Center')].value_counts(subset=['Neighborhood'])      

Neighborhood   
Tampines           5
Singapore River    3
Downtown Core      3
Ang Mo Kio         2
Bukit Merah        2
Clementi           2
Jurong East        1
Bishan             1
Boon Lay           1
Bukit Panjang      1
Hougang            1
Yishun             1
Woodlands          1
Museum             1
Newton             1
Novena             1
Pasir Ris          1
Pioneer            1
Sengkang           1
Jurong West        1
dtype: int64

In [34]:
singapore_venues[(singapore_venues["Venue Category"] == 'Gym') | (singapore_venues["Venue Category"] == 'Gym / Fitness Center')].value_counts(subset=['Neighborhood']).to_frame().reset_index()             

Unnamed: 0,Neighborhood,0
0,Tampines,5
1,Singapore River,3
2,Downtown Core,3
3,Ang Mo Kio,2
4,Bukit Merah,2
5,Clementi,2
6,Jurong East,1
7,Bishan,1
8,Boon Lay,1
9,Bukit Panjang,1


In [46]:
neighborhoodsMostGyms = singapore_venues[(singapore_venues["Venue Category"] == 'Gym') | (singapore_venues["Venue Category"] == 'Gym / Fitness Center')].value_counts(subset=['Neighborhood']).to_frame().reset_index()['Neighborhood'][0:6]          
for index,value in neighborhoodsMostGyms.iteritems():
    print(value)

Tampines
Singapore River
Downtown Core
Ang Mo Kio
Bukit Merah
Clementi


In [50]:
planningArea_data.loc[planningArea_data['Planning Area'] == 'Tampines', 'Latitude']

46    1.354653
Name: Latitude, dtype: float64

In [53]:
planningArea_data.loc[planningArea_data['Planning Area'] == 'Tampines', 'Longitude']

46    103.943571
Name: Longitude, dtype: float64

In [55]:
planningArea_data

Unnamed: 0,Planning Area,Latitude,Longitude
0,Ang Mo Kio,1.37008,103.849523
1,Bedok,1.323976,103.930216
2,Bishan,1.350986,103.848255
3,Boon Lay,1.33855,103.705812
4,Bukit Batok,1.349057,103.749591
5,Bukit Merah,1.274864,103.820276
6,Bukit Panjang,1.377917,103.763095
7,Bukit Timah,1.35469,103.776372
8,Central Water Catchment,1.370059,103.803448
9,Changi,1.35108,103.990064


In [66]:
address = 'Singapore'

#Get the latitude and longitude of Singapore
geolocator = Nominatim(user_agent="sg_explorer")
location = geolocator.geocode(address)
latitude = location.latitude 
longitude = location.longitude

# create map of New York using latitude and longitude values
map_singapore = folium.Map(location=[latitude, longitude], zoom_start=10)

#add markers to map
neighborhoodsMostGyms = singapore_venues[(singapore_venues["Venue Category"] == 'Gym') | (singapore_venues["Venue Category"] == 'Gym / Fitness Center')].value_counts(subset=['Neighborhood']).to_frame().reset_index()['Neighborhood'][0:6]          
for index,value in neighborhoodsMostGyms.iteritems():
    neighborhood = value
    lat = planningArea_data.loc[planningArea_data['Planning Area'] == value, 'Latitude'].iloc[0]
    lng = planningArea_data.loc[planningArea_data['Planning Area'] == value, 'Longitude'].iloc[0]    
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_singapore)  
    
map_singapore