In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
import numpy as np
import pandas as pd

In [3]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

In [4]:
soup = BeautifulSoup(source,'lxml')

In [5]:
container = soup.find('div',class_='mw-content-ltr')
#find class containing postcode table tags
tables = container.find('table',class_='wikitable sortable')

In [6]:
#Derive and store tag values
data = [[]]
cnt = 0
rows=tables.findAll("tr")
for row in rows:
    cnt = cnt + 1
    cells = row.findAll(["td","th"])
    final = []
    for j in range(len(cells)):
        cell = cells[j]
        txt = cell.text.rstrip()
        final.append(txt)
    if 'Not assigned' in final[1]:
        final=[]
    data.insert(cnt,final)

In [7]:
#Remove empty lists
list2 = [x for x in data if x]

In [8]:
columns = list2[0]
pcdf = pd.DataFrame(list2[1:])
pcdf.columns = columns
#Assign Borough values to Neighbourhood in case of Not assigned
pcdf.loc[pcdf['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = pcdf['Borough']

In [9]:
final_df = pd.DataFrame(pcdf.groupby(['Postcode','Borough'],as_index=False)['Neighbourhood'].apply(','.join)).reset_index()

In [10]:
final_df.columns = columns

In [11]:
#Number of rows
print(final_df.shape[0])

103


In [12]:
geo_codes_df = pd.read_csv("Geospatial_Coordinates.csv")

In [13]:
merged_df = pd.merge(final_df,geo_codes_df,
                  how='inner', left_on=['Postcode'], right_on = ['Postal Code'])

In [14]:
merged_df

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,M1J,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",M1K,43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",M1L,43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",M1M,43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",M1N,43.692657,-79.264848


In [15]:
df_t = merged_df.loc[merged_df['Borough'].isin(['Downtown Toronto'])].reset_index(drop=True)

In [16]:
df_t

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,M4W,43.679563,-79.377529
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",M4X,43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,M4Y,43.66586,-79.38316
3,M5A,Downtown Toronto,"Harbourfront,Regent Park",M5A,43.65426,-79.360636
4,M5B,Downtown Toronto,"Ryerson,Garden District",M5B,43.657162,-79.378937
5,M5C,Downtown Toronto,St. James Town,M5C,43.651494,-79.375418
6,M5E,Downtown Toronto,Berczy Park,M5E,43.644771,-79.373306
7,M5G,Downtown Toronto,Central Bay Street,M5G,43.657952,-79.387383
8,M5H,Downtown Toronto,"Adelaide,King,Richmond",M5H,43.650571,-79.384568
9,M5J,Downtown Toronto,"Harbourfront East,Toronto Islands,Union Station",M5J,43.640816,-79.381752


In [17]:
import folium
from pandas.io.json import json_normalize

Explore Areas in Toronto

In [18]:
map_toronto = folium.Map(location=[43.679563, -79.377529], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_t['Latitude'], df_t['Longitude'], df_t['Borough'], df_t['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7
        ).add_to(map_toronto)  
    
map_toronto

In [19]:
CLIENT_ID = 'Y3VXY10YN00C5HLQEPM3GZL4JLGZRN31YB1IAIBXYRDMORVQ' # your Foursquare ID
CLIENT_SECRET = 'D3LF5XOY4SGC0NHAVT3UWKUO55HK14GF5MVL2DXIXJKL0MST' # your Foursquare Secret
VERSION = '20181004' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: Y3VXY10YN00C5HLQEPM3GZL4JLGZRN31YB1IAIBXYRDMORVQ
CLIENT_SECRET:D3LF5XOY4SGC0NHAVT3UWKUO55HK14GF5MVL2DXIXJKL0MST


Select the first Neighbour 

In [20]:
df_t.loc[0, 'Neighbourhood']

'Rosedale'

In [21]:
neighborhood_latitude = df_t.loc[0, 'Latitude'] # Rosedale latitude value
neighborhood_longitude = df_t.loc[0, 'Longitude'] # Rosedale longitude value

neighborhood_name = df_t.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rosedale are 43.6795626, -79.37752940000001.


In [22]:
LIMIT = 100
radius = 3000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=Y3VXY10YN00C5HLQEPM3GZL4JLGZRN31YB1IAIBXYRDMORVQ&client_secret=D3LF5XOY4SGC0NHAVT3UWKUO55HK14GF5MVL2DXIXJKL0MST&v=20181004&ll=43.6795626,-79.37752940000001&radius=3000&limit=100'

In [23]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5bb72763f594df1e00eac8d7'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'},
    {'name': 'With specials', 'key': 'specials'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 236,
  'suggestedBounds': {'ne': {'lat': 43.706562627000025,
    'lng': -79.34026569646221},
   'sw': {'lat': 43.65256257299997, 'lng': -79.41479310353782}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4adcb343f964a520e32e21e3',
       'name': 'Summerhill Market',
       'location': {'address': '446 Summerhill Ave',
        'crossStreet': 'btwn. MacLennan Ave. and Glen Rd.',
        'lat': 43.68626482142425,
        '

#### Observations - There are many parks around Rosedale

In [24]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [25]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Summerhill Market,Grocery Store,43.686265,-79.375458
1,Black Camel,BBQ Joint,43.677016,-79.389367
2,Greenhouse Juice Co,Juice Bar,43.679101,-79.390686
3,LCBO,Liquor Store,43.681497,-79.391261
4,Evergreen Brick Works Farmers Market,Farmers Market,43.684282,-79.365649


Explore Neighbours

In [26]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [27]:
Toronto_venues = getNearbyVenues(names=df_t['Neighbourhood'],
                                   latitudes=df_t['Latitude'],
                                   longitudes=df_t['Longitude']
                                  )

Rosedale
Cabbagetown,St. James Town
Church and Wellesley
Harbourfront,Regent Park
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide,King,Richmond
Harbourfront East,Toronto Islands,Union Station
Design Exchange,Toronto Dominion Centre
Commerce Court,Victoria Hotel
Harbord,University of Toronto
Chinatown,Grange Park,Kensington Market
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place,Underground city
Christie


In [28]:
Toronto_venues

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.679563,-79.377529,Rosedale Park,43.682328,-79.378934,Playground
1,Rosedale,43.679563,-79.377529,Whitney Park,43.682036,-79.373788,Park
2,Rosedale,43.679563,-79.377529,Alex Murray Parkette,43.678300,-79.382773,Park
3,Rosedale,43.679563,-79.377529,Milkman's Lane,43.676352,-79.373842,Trail
4,"Cabbagetown,St. James Town",43.667967,-79.367675,Cranberries,43.667843,-79.369407,Diner
5,"Cabbagetown,St. James Town",43.667967,-79.367675,Butter Chicken Factory,43.667072,-79.369184,Indian Restaurant
6,"Cabbagetown,St. James Town",43.667967,-79.367675,F'Amelia,43.667536,-79.368613,Italian Restaurant
7,"Cabbagetown,St. James Town",43.667967,-79.367675,Kingyo Toronto,43.665895,-79.368415,Japanese Restaurant
8,"Cabbagetown,St. James Town",43.667967,-79.367675,Rashnaa Restaurant,43.668183,-79.369066,Indian Restaurant
9,"Cabbagetown,St. James Town",43.667967,-79.367675,Merryberry Cafe + Bistro,43.666630,-79.368792,Café


In [29]:
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighbourhood'] = Toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,"Cabbagetown,St. James Town",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
Toronto_grouped = Toronto_onehot.groupby('Neighbourhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,"Adelaide,King,Richmond",0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0,0.0,0.0,0.076923,0.076923,0.153846,0.153846,0.153846,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown,St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,...,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.011765
5,"Chinatown,Grange Park,Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.01,0.0,0.0,0.06,0.0,0.04,0.01,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,...,0.0,0.0,0.0,0.0,0.011765,0.011765,0.011765,0.0,0.0,0.011765
8,"Commerce Court,Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
9,"Design Exchange,Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0


In [31]:
num_top_venues = 50

for hood in Toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
                         venue  freq
0                  Coffee Shop  0.07
1                         Café  0.06
2          American Restaurant  0.04
3                   Steakhouse  0.04
4              Thai Restaurant  0.04
5                   Restaurant  0.03
6                        Hotel  0.03
7                          Gym  0.03
8                          Bar  0.03
9               Cosmetics Shop  0.02
10                      Bakery  0.02
11                Burger Joint  0.02
12              Breakfast Spot  0.02
13                Concert Hall  0.02
14            Sushi Restaurant  0.02
15                   Gastropub  0.02
16               Deli / Bodega  0.02
17              Clothing Store  0.02
18         Japanese Restaurant  0.02
19            Asian Restaurant  0.02
20         Monument / Landmark  0.01
21    Mediterranean Restaurant  0.01
22          Salon / Barbershop  0.01
23                 Salad Place  0.01
24          Italian Restaurant  0.01
25     

49                      Nightclub  0.01


----Commerce Court,Victoria Hotel----
                      venue  freq
0               Coffee Shop  0.14
1                      Café  0.06
2                     Hotel  0.06
3                Restaurant  0.05
4       American Restaurant  0.04
5                       Gym  0.03
6                Steakhouse  0.03
7        Seafood Restaurant  0.03
8                 Gastropub  0.03
9             Deli / Bodega  0.03
10       Italian Restaurant  0.03
11                      Bar  0.02
12             Cocktail Bar  0.02
13          Thai Restaurant  0.02
14                 Tea Room  0.02
15                   Bakery  0.02
16     Gym / Fitness Center  0.01
17         Department Store  0.01
18                      Pub  0.01
19                 Creperie  0.01
20             Concert Hall  0.01
21              Salad Place  0.01
22                     Park  0.01
23           Sandwich Place  0.01
24      Japanese Restaurant  0.01
25                   Church  0.01
26

                              venue  freq
0                       Coffee Shop  0.07
1                              Café  0.06
2                        Restaurant  0.05
3                    Clothing Store  0.04
4                             Hotel  0.04
5                      Cocktail Bar  0.03
6               Japanese Restaurant  0.03
7                Italian Restaurant  0.03
8                         Gastropub  0.03
9                    Cosmetics Shop  0.03
10                         Beer Bar  0.02
11                   Farmers Market  0.02
12                   Breakfast Spot  0.02
13                           Bakery  0.02
14                            Diner  0.02
15                  Thai Restaurant  0.02
16                         Creperie  0.02
17              American Restaurant  0.02
18               Seafood Restaurant  0.02
19                             Park  0.02
20                              Pub  0.01
21                      Fish Market  0.01
22                Electronics Stor

In [32]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [33]:
num_top_venues = 50

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = Toronto_grouped['Neighbourhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,...,41th Most Common Venue,42th Most Common Venue,43th Most Common Venue,44th Most Common Venue,45th Most Common Venue,46th Most Common Venue,47th Most Common Venue,48th Most Common Venue,49th Most Common Venue,50th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,American Restaurant,Steakhouse,Thai Restaurant,Gym,Bar,Restaurant,Hotel,...,Latin American Restaurant,Plaza,Accessories Store,Speakeasy,Seafood Restaurant,Wine Bar,Vegetarian / Vegan Restaurant,Furniture / Home Store,Theater,General Travel
1,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Seafood Restaurant,Farmers Market,Bakery,Steakhouse,Restaurant,Cheese Shop,...,Hotel,Bagel Shop,Electronics Store,Fast Food Restaurant,Dance Studio,Gay Bar,Gastropub,Falafel Restaurant,Gaming Cafe,Deli / Bodega
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Airport Lounge,Airport Service,Airport Terminal,Boat or Ferry,Plane,Boutique,Sculpture Garden,Airport,Airport Food Court,...,German Restaurant,General Travel,General Entertainment,Gay Bar,Gastropub,Gaming Cafe,Furniture / Home Store,Fried Chicken Joint,French Restaurant,Fountain
3,"Cabbagetown,St. James Town",Coffee Shop,Restaurant,Café,Pizza Place,Bakery,Pub,Italian Restaurant,Indian Restaurant,Pharmacy,...,French Restaurant,Department Store,General Travel,Dance Studio,Creperie,Cosmetics Shop,Fried Chicken Joint,Food Truck,Fountain,Farmers Market
4,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Ice Cream Shop,Bar,Bubble Tea Shop,Burger Joint,Japanese Restaurant,...,Restaurant,Vegetarian / Vegan Restaurant,Smoothie Shop,Steakhouse,Thai Restaurant,Wine Bar,Seafood Restaurant,Tea Room,Sushi Restaurant,Record Shop
5,"Chinatown,Grange Park,Kensington Market",Café,Vegetarian / Vegan Restaurant,Bar,Chinese Restaurant,Vietnamese Restaurant,Bakery,Mexican Restaurant,Coffee Shop,Dumpling Restaurant,...,Jazz Club,Cheese Shop,Arepa Restaurant,Taco Place,Smoke Shop,Bagel Shop,Tea Room,Snack Place,Thrift / Vintage Store,Toy / Game Store
6,Christie,Grocery Store,Café,Park,Nightclub,Italian Restaurant,Restaurant,Diner,Baby Store,Convenience Store,...,Gaming Cafe,Furniture / Home Store,Fried Chicken Joint,French Restaurant,Fountain,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Fish Market
7,Church and Wellesley,Japanese Restaurant,Coffee Shop,Sushi Restaurant,Gay Bar,Restaurant,Burger Joint,Bubble Tea Shop,Café,Men's Store,...,Yoga Studio,Nightclub,Food & Drink Shop,Sculpture Garden,Sports Bar,American Restaurant,Arts & Crafts Store,Tea Room,Theater,Smoke Shop
8,"Commerce Court,Victoria Hotel",Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Seafood Restaurant,Italian Restaurant,Steakhouse,Gym,...,Fountain,Food Truck,General Travel,Food Court,Gluten-free Restaurant,Greek Restaurant,Tailor Shop,Gym / Fitness Center,Sporting Goods Shop,Concert Hall
9,"Design Exchange,Toronto Dominion Centre",Coffee Shop,Hotel,Café,American Restaurant,Deli / Bodega,Gastropub,Restaurant,Gym,Sports Bar,...,Tea Room,Thai Restaurant,Theater,Sporting Goods Shop,Speakeasy,Train Station,Asian Restaurant,Wine Bar,Shopping Mall,Flower Shop


In [35]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 10

Toronto_grouped_clustering = Toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([6, 5, 2, 9, 4, 8, 3, 4, 6, 6])

In [36]:
Toronto_merged = df_t

# add clustering labels
Toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

Toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,...,41th Most Common Venue,42th Most Common Venue,43th Most Common Venue,44th Most Common Venue,45th Most Common Venue,46th Most Common Venue,47th Most Common Venue,48th Most Common Venue,49th Most Common Venue,50th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,M4W,43.679563,-79.377529,6,Park,Playground,Trail,...,Fast Food Restaurant,Fried Chicken Joint,French Restaurant,Fountain,Food Truck,Food Court,Food & Drink Shop,Flower Shop,Fish Market,Fish & Chips Shop
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",M4X,43.667967,-79.367675,5,Coffee Shop,Restaurant,Café,...,French Restaurant,Department Store,General Travel,Dance Studio,Creperie,Cosmetics Shop,Fried Chicken Joint,Food Truck,Fountain,Farmers Market
2,M4Y,Downtown Toronto,Church and Wellesley,M4Y,43.66586,-79.38316,2,Japanese Restaurant,Coffee Shop,Sushi Restaurant,...,Yoga Studio,Nightclub,Food & Drink Shop,Sculpture Garden,Sports Bar,American Restaurant,Arts & Crafts Store,Tea Room,Theater,Smoke Shop
3,M5A,Downtown Toronto,"Harbourfront,Regent Park",M5A,43.65426,-79.360636,9,Coffee Shop,Park,Bakery,...,Donut Shop,Doner Restaurant,Dog Run,Discount Store,Diner,Arepa Restaurant,Dim Sum Restaurant,Fish Market,Basketball Stadium,Department Store
4,M5B,Downtown Toronto,"Ryerson,Garden District",M5B,43.657162,-79.378937,4,Coffee Shop,Clothing Store,Café,...,Burger Joint,Gym,Spa,Wine Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Toy / Game Store,Furniture / Home Store,Thai Restaurant,Tanning Salon


In [37]:
import matplotlib.cm as cm
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[43.679563, -79.377529], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighbourhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [38]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,...,41th Most Common Venue,42th Most Common Venue,43th Most Common Venue,44th Most Common Venue,45th Most Common Venue,46th Most Common Venue,47th Most Common Venue,48th Most Common Venue,49th Most Common Venue,50th Most Common Venue
12,Downtown Toronto,-79.400049,0,Café,Bar,Japanese Restaurant,Coffee Shop,Bookstore,Bakery,Restaurant,...,German Restaurant,Convenience Store,Dog Run,Fish & Chips Shop,Filipino Restaurant,Concert Hall,Fish Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop
13,Downtown Toronto,-79.400049,0,Café,Vegetarian / Vegan Restaurant,Bar,Chinese Restaurant,Vietnamese Restaurant,Bakery,Mexican Restaurant,...,Jazz Club,Cheese Shop,Arepa Restaurant,Taco Place,Smoke Shop,Bagel Shop,Tea Room,Snack Place,Thrift / Vintage Store,Toy / Game Store
