Use the Foursquare API to explore neighborhoods in Toronto. Use the **explore** function to get the most common venue categories in each neighborhood, and then use this feature to group the neighborhoods into clusters. Use the *k*-means clustering algorithm to complete this task. Finally, use the Folium library to visualize the neighborhoods in Toronto and their emerging clusters.

To simplify things only include Toronto boroughs that contain the word Toronto.

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files


In [2]:
# Following two lines not required
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
# from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [3]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [4]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans


In [5]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
#!conda install -c conda-forge folium --yes 
import folium # map rendering library

print('Libraries imported.')


Libraries imported.


In [6]:
# #### Load and explore the Toronto Neighbourhood data --- created eariler in this assignment
# dfTO = pd.read_csv('Toronto Neighborhoods.csv').drop(['Unnamed: 0'],axis=1)
dfTO = pd.read_csv('Toronto Neighborhoods.csv')
dfTO.head(5)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [7]:
criteria = (dfTO['Borough'].str.contains('Toronto'))
dfTO2 = dfTO[criteria]

In [8]:
# Confirm PostalCode, Borough, Neighourhood, latitude and longitude info are there
dfTO2.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [9]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(dfTO2['Borough'].unique()),
        dfTO2.shape[0]
    )
)


The dataframe has 4 boroughs and 38 neighborhoods.


In [10]:
# Create a map of Toronto with neighborhoods superimposed on top.

latitude = dfTO.loc[dfTO['PostalCode'] == 'M5J']['Latitude']
longitude = dfTO.loc[dfTO['PostalCode'] == 'M5J']['Longitude']
# df.loc[df['column_name'] == some_value]
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=14)

for row in dfTO2.itertuples(index=False):
    lat = row[3]  # Latitude
    lng = row[4]  # Longitude
    borough = row[1]  # Borough
    neighborhood = row[2]    # Neighborhood

# for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
#    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=True).add_to(map_toronto)


display(map_toronto)
# map_toronto.save("test_map.html")

To further simplify the analysis, from this point focus on neighborhoods around Union Station in downtown. Specifically, only for Postal Codes M5J, M5B , M5G, M5K and M5L

In [11]:
# Create dataframe with only areas around Union Station

# DT_data = dfTO[dfTO['PostalCode']=='M5J'].reset_index(drop=True, inplace=True)
DT_creteria = ((dfTO['PostalCode']=='M5J') | (dfTO['PostalCode']=='M5B') | (dfTO['PostalCode']=='M5G') | (dfTO['PostalCode']=='M5K') | (dfTO['PostalCode']=='M5L'))

#DT_creteria = ((dfTO['PostalCode']=='M5J') | (dfTO['PostalCode']=='M5B'))
DT_data = dfTO[DT_creteria].reset_index(drop=True)
#DT_data=DT_data.drop(['index'],axis=1)
DT_data

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
1,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
2,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752
3,M5K,Downtown Toronto,"Design Exchange, Toronto Dominion Centre",43.647177,-79.381576
4,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817


In [12]:
# Create map centering on Union Station using location data for Postal Code M5J

lat = dfTO.loc[dfTO['PostalCode'] == 'M5J']['Latitude'].to_string(index=False)
lng = dfTO.loc[dfTO['PostalCode'] == 'M5J']['Longitude'].to_string(index=False)

#lat = DT_data['Latitude'].to_string(index=False)
#lng = DT_data['Longitude'].to_string(index=False)

map_union = folium.Map(location=[lat, lng], zoom_start=13)

# add markers to map
for lat, lng, label in zip(DT_data['Latitude'], DT_data['Longitude'], DT_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_union)

display(map_union)

Start utilizing the Foursquare API to explore the neighborhoods and segment them

In [13]:
# Define Foursquare Credentials and Version

CLIENT_ID = 'REDACTED' # your Foursquare ID
CLIENT_SECRET = 'REDACTED' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version


In [14]:
DT_data.loc[0, 'Neighbourhood']
neighborhood_latitude = lat # neighborhood latitude value
neighborhood_longitude = lng # neighborhood longitude value
neighborhood_name = DT_data['Neighbourhood'].to_string(index=False) # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))


Latitude and longitude values of                           Ryerson, Garden District
                                Central Bay Street
 Harbourfront East, Toronto Islands, Union Station
          Design Exchange, Toronto Dominion Centre
                    Commerce Court, Victoria Hotel are 43.6481985, -79.37981690000002.


In [15]:
# Get the top 100 venues that are around Union Station within a radius of 500 meters.

In [16]:
# create the request URL
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 #define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
# url # display URL

In [17]:
# Send the GET request and examine the resutls

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ce3389ddd57970e2345c813'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Financial District',
  'headerFullLocation': 'Financial District, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 185,
  'suggestedBounds': {'ne': {'lat': 43.6526985045, 'lng': -79.37360952665503},
   'sw': {'lat': 43.6436984955, 'lng': -79.38602427334501}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '501ae947e4b0d11883b910a7',
       'name': 'Equinox Bay Street',
       'location': {'address': '199 Bay St',
        'crossStreet': 'at Commerce Court West, PATH Level',
        'lat': 43.64809974034856,
        'lng': -79.37998869411526,
    

In [18]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


In [19]:
# Now we are ready to clean the json and structure it into a *pandas* dataframe
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()


Unnamed: 0,name,categories,lat,lng
0,Equinox Bay Street,Gym,43.6481,-79.379989
1,Canoe,Restaurant,43.647452,-79.38132
2,Mos Mos Coffee,Café,43.648159,-79.378745
3,Walrus Pub & Beer Hall,Pub,43.647375,-79.379515
4,Maman,Café,43.648309,-79.382253


In [20]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [21]:
# Create a function to repeat the same process to all the neighborhoods around Union Station
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)



In [22]:
union_venues = getNearbyVenues(names=DT_data['Neighbourhood'],
                                   latitudes=DT_data['Latitude'],
                                   longitudes=DT_data['Longitude']
                                  )


Ryerson, Garden District
Central Bay Street
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel


In [23]:
print(union_venues.shape)
union_venues.head()


(487, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Ryerson, Garden District",43.657162,-79.378937,UNIQLO ユニクロ,43.65591,-79.380641,Clothing Store
1,"Ryerson, Garden District",43.657162,-79.378937,Blaze Pizza,43.656518,-79.380015,Pizza Place
2,"Ryerson, Garden District",43.657162,-79.378937,Burrito Boyz,43.656265,-79.378343,Burrito Place
3,"Ryerson, Garden District",43.657162,-79.378937,Silver Snail Comics,43.657031,-79.381403,Comic Shop
4,"Ryerson, Garden District",43.657162,-79.378937,Page One Cafe,43.657772,-79.376073,Café


In [24]:
# Let's check how many venues were returned for each neighborhood

union_venues.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Bay Street,87,87,87,87,87,87
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"Design Exchange, Toronto Dominion Centre",100,100,100,100,100,100
"Harbourfront East, Toronto Islands, Union Station",100,100,100,100,100,100
"Ryerson, Garden District",100,100,100,100,100,100


In [25]:
# Find out how many unique categories can be curated from all the returned venues

print('There are {} uniques categories.'.format(len(union_venues['Venue Category'].unique())))

There are 121 uniques categories.


In [26]:
# Analyze The Neighborhood
# one hot encoding
union_onehot = pd.get_dummies(union_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
union_onehot['Neighborhood'] = union_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [union_onehot.columns[-1]] + list(union_onehot.columns[:-1])
union_onehot = union_onehot[fixed_columns]

union_onehot.head()

Unnamed: 0,Yoga Studio,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beer Bar,Bistro,Bookstore,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Business Service,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Rec Center,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Donut Shop,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gluten-free Restaurant,Greek Restaurant,Gym,Gym / Fitness Center,History Museum,Hookah Bar,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Lingerie Store,Lounge,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Office,Other Great Outdoors,Park,Performing Arts Venue,Pizza Place,Plaza,Poke Place,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Tanning Salon,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Ryerson, Garden District",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Ryerson, Garden District",0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Ryerson, Garden District",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Ryerson, Garden District",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"Ryerson, Garden District",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
# Examine the new dataframe size.

union_onehot.shape

(487, 121)

In [28]:
# Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


union_grouped = union_onehot.groupby('Neighborhood').mean().reset_index()
union_grouped


Unnamed: 0,Neighborhood,Yoga Studio,American Restaurant,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beer Bar,Bistro,Bookstore,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Business Service,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,College Rec Center,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Donut Shop,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gluten-free Restaurant,Greek Restaurant,Gym,Gym / Fitness Center,History Museum,Hookah Bar,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Lingerie Store,Lounge,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Movie Theater,Museum,Music Venue,New American Restaurant,Office,Other Great Outdoors,Park,Performing Arts Venue,Pizza Place,Plaza,Poke Place,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Tanning Salon,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,Central Bay Street,0.011494,0.011494,0.0,0.0,0.011494,0.011494,0.0,0.022989,0.0,0.022989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022989,0.034483,0.0,0.011494,0.045977,0.022989,0.011494,0.0,0.16092,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.011494,0.011494,0.011494,0.011494,0.0,0.0,0.011494,0.0,0.0,0.0,0.011494,0.011494,0.011494,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.022989,0.022989,0.0,0.045977,0.022989,0.0,0.011494,0.011494,0.0,0.0,0.0,0.0,0.0,0.022989,0.011494,0.011494,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.011494,0.0,0.011494,0.011494,0.0,0.0,0.011494,0.022989,0.022989,0.034483,0.0,0.011494,0.0,0.0,0.0,0.0,0.011494,0.0,0.022989,0.0,0.0,0.0,0.011494,0.0,0.022989,0.0,0.0,0.0,0.011494,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.011494
1,"Commerce Court, Victoria Hotel",0.0,0.04,0.0,0.01,0.0,0.0,0.0,0.03,0.01,0.02,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.07,0.0,0.0,0.01,0.11,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.0,0.03,0.01,0.01,0.02,0.01,0.0,0.0,0.06,0.0,0.01,0.0,0.01,0.03,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.05,0.02,0.01,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.02,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.01
2,"Design Exchange, Toronto Dominion Centre",0.0,0.03,0.0,0.01,0.0,0.0,0.01,0.03,0.0,0.02,0.0,0.01,0.02,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.0,0.07,0.01,0.0,0.01,0.12,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.03,0.01,0.01,0.02,0.01,0.0,0.0,0.08,0.01,0.01,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.05,0.01,0.01,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.01
3,"Harbourfront East, Toronto Islands, Union Station",0.0,0.0,0.05,0.01,0.0,0.0,0.0,0.03,0.01,0.02,0.02,0.01,0.01,0.01,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.12,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.05,0.01,0.01,0.01,0.0,0.04,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.01,0.0,0.02,0.01,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.01,0.03,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.02,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.01
4,"Ryerson, Garden District",0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.01,0.01,0.0,0.04,0.01,0.06,0.0,0.1,0.01,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.01,0.0,0.01,0.0,0.01,0.01,0.01,0.03,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.01,0.01,0.02,0.02,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.02,0.01,0.02,0.01,0.0,0.01,0.01,0.01,0.01


In [29]:
# Confirm the new size

union_grouped.shape

(5, 121)

In [30]:
# Print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in union_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = union_grouped[union_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.16
1                Café  0.05
2  Italian Restaurant  0.05
3        Burger Joint  0.03
4      Sandwich Place  0.03


----Commerce Court, Victoria Hotel----
                 venue  freq
0          Coffee Shop  0.11
1                 Café  0.07
2                Hotel  0.06
3           Restaurant  0.05
4  American Restaurant  0.04


----Design Exchange, Toronto Dominion Centre----
         venue  freq
0  Coffee Shop  0.12
1        Hotel  0.08
2         Café  0.07
3   Restaurant  0.05
4   Steakhouse  0.03


----Harbourfront East, Toronto Islands, Union Station----
                venue  freq
0         Coffee Shop  0.12
1            Aquarium  0.05
2               Hotel  0.05
3                Café  0.04
4  Italian Restaurant  0.04


----Ryerson, Garden District----
                       venue  freq
0                Coffee Shop  0.10
1             Clothing Store  0.06
2                       Café  0.04
3         

In [31]:
# Write a function to sort the venues in descending order.


def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]



In [32]:
# Create a new dataframe and display the top 10 venues for the neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = union_grouped['Neighborhood']

for ind in np.arange(union_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(union_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Indian Restaurant,Japanese Restaurant,Salad Place,Restaurant,Spa
1,"Commerce Court, Victoria Hotel",Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Bakery,Seafood Restaurant,Italian Restaurant,Steakhouse,Deli / Bodega
2,"Design Exchange, Toronto Dominion Centre",Coffee Shop,Hotel,Café,Restaurant,Italian Restaurant,Steakhouse,American Restaurant,Seafood Restaurant,Gastropub,Deli / Bodega
3,"Harbourfront East, Toronto Islands, Union Station",Coffee Shop,Aquarium,Hotel,Italian Restaurant,Café,Pizza Place,Brewery,Restaurant,Fried Chicken Joint,Bakery
4,"Ryerson, Garden District",Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Bakery,Pizza Place,Ramen Restaurant,Restaurant,Japanese Restaurant


In [33]:
# Cluster Neighborhoods

# Run *k*-means to cluster the neighborhood into 5 clusters.

# set number of clusters
kclusters = 5

union_grouped_clustering = union_grouped.drop('Neighborhood', 1)


In [34]:
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(union_grouped_clustering)


In [35]:
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 4, 1, 3, 0])

In [36]:
# Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

union_merged = DT_data


In [37]:
# merge union_grouped with toronto_data to add latitude/longitude for each neighborhood
union_merged = union_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

union_merged.head() # check the last columns!


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Bakery,Pizza Place,Ramen Restaurant,Restaurant,Japanese Restaurant
1,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,2,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Burger Joint,Indian Restaurant,Japanese Restaurant,Salad Place,Restaurant,Spa
2,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,3,Coffee Shop,Aquarium,Hotel,Italian Restaurant,Café,Pizza Place,Brewery,Restaurant,Fried Chicken Joint,Bakery
3,M5K,Downtown Toronto,"Design Exchange, Toronto Dominion Centre",43.647177,-79.381576,1,Coffee Shop,Hotel,Café,Restaurant,Italian Restaurant,Steakhouse,American Restaurant,Seafood Restaurant,Gastropub,Deli / Bodega
4,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,4,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Bakery,Seafood Restaurant,Italian Restaurant,Steakhouse,Deli / Bodega


In [38]:
# Finally, let's visualize the resulting clusters

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(union_merged['Latitude'], union_merged['Longitude'], union_merged['Neighbourhood'], union_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters
