In [1]:
import scipy as sp
import numpy as np
import folium
import pandas as pd
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup
%matplotlib inline

We will be working with London first. We will need to scrape the data from the wikipedia page containing the postal codes.

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_areas_of_London'
response = requests.get(url)
soup = BeautifulSoup(response.text)

table = [
    [
        [td.get_text(strip=True) for td in tr.find_all('td')] for tr in table.find_all('tr')
    ] 
    for table in soup.find_all('table')
]
table

[[['Map all coordinates in "Category:Areas of London" using:OpenStreetMap'],
  ['Download coordinates as:KML·GPX']],
 [[],
  ['Abbey Wood', 'Bexley,  Greenwich[2]', 'LONDON', 'SE2', '020', 'TQ465785'],
  ['Acton',
   'Ealing, Hammersmith and Fulham[3]',
   'LONDON',
   'W3, W4',
   '020',
   'TQ205805'],
  ['Addington', 'Croydon[3]', 'CROYDON', 'CR0', '020', 'TQ375645'],
  ['Addiscombe', 'Croydon[3]', 'CROYDON', 'CR0', '020', 'TQ345665'],
  ['Albany Park', 'Bexley', 'BEXLEY, SIDCUP', 'DA5, DA14', '020', 'TQ478728'],
  ['Aldborough Hatch', 'Redbridge[4]', 'ILFORD', 'IG2', '020', 'TQ455895'],
  ['Aldgate', 'City[5]', 'LONDON', 'EC3', '020', 'TQ334813'],
  ['Aldwych', 'Westminster[5]', 'LONDON', 'WC2', '020', 'TQ307810'],
  ['Alperton', 'Brent[6]', 'WEMBLEY', 'HA0', '020', 'TQ185835'],
  ['Anerley', 'Bromley[6]', 'LONDON', 'SE20', '020', 'TQ345695'],
  ['Angel', 'Islington[3]', 'LONDON', 'EC1, N1', '020', 'TQ345665'],
  ['Aperfield', 'Bromley[6]', 'WESTERHAM', 'TN16', '01959', 'TQ425585']

In [3]:
table[1][533]

location = []
borough = []
town = []
postcode = []

for i in range(1,534):
    location_to_add = table[1][i][0]
    borough_to_add = table[1][i][1]
    town_to_add = table[1][i][2]
    postcode_to_add = table[1][i][3]
    
    location.append(location_to_add)
    borough.append(borough_to_add)
    town.append(town_to_add)
    postcode.append(postcode_to_add)

In [4]:
data = list(zip(location, borough, town, postcode))
df_wiki = pd.DataFrame(data, columns=['Location', 'Borough', 'Town', 'Postcode'])
df_wiki

Unnamed: 0,Location,Borough,Town,Postcode
0,Abbey Wood,"Bexley, Greenwich[2]",LONDON,SE2
1,Acton,"Ealing, Hammersmith and Fulham[3]",LONDON,"W3, W4"
2,Addington,Croydon[3],CROYDON,CR0
3,Addiscombe,Croydon[3],CROYDON,CR0
4,Albany Park,Bexley,"BEXLEY, SIDCUP","DA5, DA14"
...,...,...,...,...
528,Woolwich,Greenwich,LONDON,SE18
529,Worcester Park,"Sutton, Kingston upon Thames",WORCESTER PARK,KT4
530,Wormwood Scrubs,Hammersmith and Fulham,LONDON,W12
531,Yeading,Hillingdon,HAYES,UB4


We will not be able to look at the whole city of London so we will only look at the towns listed as London

In [38]:
df = df_wiki.loc[df_wiki['Town'] == 'LONDON']
df

Unnamed: 0,Location,Borough,Town,Postcode
0,Abbey Wood,"Bexley, Greenwich[2]",LONDON,SE2
1,Acton,"Ealing, Hammersmith and Fulham[3]",LONDON,"W3, W4"
6,Aldgate,City[5],LONDON,EC3
7,Aldwych,Westminster[5],LONDON,WC2
9,Anerley,Bromley[6],LONDON,SE20
...,...,...,...,...
522,Wood Green,Haringey,LONDON,N22
523,Woodford,Redbridge,LONDON,"IG8, E18"
527,Woodside Park,Barnet,LONDON,N12
528,Woolwich,Greenwich,LONDON,SE18


In [39]:
df = df.groupby(['Postcode']).agg(lambda x: ','.join(set(x))).reset_index()
df

Unnamed: 0,Postcode,Location,Borough,Town
0,DA5,Dartford,Dartford,LONDON
1,E1,"Ratcliff,Shadwell,Wapping,Whitechapel,Mile End...",Tower Hamlets,LONDON
2,E10,Lea Bridge,Hackney,LONDON
3,"E10, E15",Leyton,Waltham Forest,LONDON
4,E11,"Leytonstone,Cann Hall,Snaresbrook,Wanstead","Waltham Forest,Redbridge, Waltham Forest,Walth...",LONDON
...,...,...,...,...
146,W9,Maida Vale,Westminster,LONDON
147,"W9, W2",Little Venice,Westminster,LONDON
148,WC1,"Bloomsbury,King's Cross,St Pancras","Camden and Islington,Camden[24],Camden",LONDON
149,"WC1, WC2",Holborn,Camden,LONDON


We cannot have 2 postcodes when looking for their latitude and longitude so we will select only the first one that appears on each row.

In [40]:
df['Postcode'][3]
for i in range(len(df['Postcode'])):
    postcode = df['Postcode'][i]
    if ',' in postcode:
        index = postcode.find(',')
        df['Postcode'][i] = postcode[:index]
        
df

Unnamed: 0,Postcode,Location,Borough,Town
0,DA5,Dartford,Dartford,LONDON
1,E1,"Ratcliff,Shadwell,Wapping,Whitechapel,Mile End...",Tower Hamlets,LONDON
2,E10,Lea Bridge,Hackney,LONDON
3,E10,Leyton,Waltham Forest,LONDON
4,E11,"Leytonstone,Cann Hall,Snaresbrook,Wanstead","Waltham Forest,Redbridge, Waltham Forest,Walth...",LONDON
...,...,...,...,...
146,W9,Maida Vale,Westminster,LONDON
147,W9,Little Venice,Westminster,LONDON
148,WC1,"Bloomsbury,King's Cross,St Pancras","Camden and Islington,Camden[24],Camden",LONDON
149,WC1,Holborn,Camden,LONDON


In [41]:
df = df.drop(columns=['Town'])
df

Unnamed: 0,Postcode,Location,Borough
0,DA5,Dartford,Dartford
1,E1,"Ratcliff,Shadwell,Wapping,Whitechapel,Mile End...",Tower Hamlets
2,E10,Lea Bridge,Hackney
3,E10,Leyton,Waltham Forest
4,E11,"Leytonstone,Cann Hall,Snaresbrook,Wanstead","Waltham Forest,Redbridge, Waltham Forest,Walth..."
...,...,...,...
146,W9,Maida Vale,Westminster
147,W9,Little Venice,Westminster
148,WC1,"Bloomsbury,King's Cross,St Pancras","Camden and Islington,Camden[24],Camden"
149,WC1,Holborn,Camden


Lets find the latitude and longitude associated with each postcode.

In [42]:
import pgeocode

latitude = []
longitude = []

for postcode in df['Postcode']:
    nom = pgeocode.Nominatim('gb')
    lat = nom.query_postal_code(postcode).latitude
    long = nom.query_postal_code(postcode).longitude
    
    latitude.append(lat)
    longitude.append(long)
    
df['Latitude'] = latitude
df['Longitude'] = longitude
df

Unnamed: 0,Postcode,Location,Borough,Latitude,Longitude
0,DA5,Dartford,Dartford,51.4406,0.144600
1,E1,"Ratcliff,Shadwell,Wapping,Whitechapel,Mile End...",Tower Hamlets,51.5000,-0.061773
2,E10,Lea Bridge,Hackney,51.5596,-0.007800
3,E10,Leyton,Waltham Forest,51.5596,-0.007800
4,E11,"Leytonstone,Cann Hall,Snaresbrook,Wanstead","Waltham Forest,Redbridge, Waltham Forest,Walth...",51.5843,0.017167
...,...,...,...,...,...
146,W9,Maida Vale,Westminster,51.5274,-0.184580
147,W9,Little Venice,Westminster,51.5274,-0.184580
148,WC1,"Bloomsbury,King's Cross,St Pancras","Camden and Islington,Camden[24],Camden",51.5175,-0.122300
149,WC1,Holborn,Camden,51.5175,-0.122300


Now we will use the Foursquare API to get the top 50 venues associated with each location.

In [43]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

CLIENT_ID = 'PQ1GADCOQYNFOSAHZ2333DRY0ZD3P20LDMT1EUQCRXW3GE1J'
CLIENT_SECRET = 'key'
VERSION = '20180605'

LIMIT = 50
radius = 500

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        try:
            results = requests.get(url).json()["response"]['groups'][0]['items']
        except:
            results = []
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

all_london_venues = getNearbyVenues(names=df['Borough'],
                            latitudes=df['Latitude'],
                            longitudes=df['Longitude'])

all_london_venues

Dartford
Tower Hamlets
Hackney
Waltham Forest
Waltham Forest,Redbridge, Waltham Forest,Waltham Forest[36],Redbridge
Newham
Newham
Newham
Tower Hamlets[35],Tower Hamlets,Tower Hamlets[23]
Newham
Newham,Newham[36]
Waltham Forest
Redbridge
Tower Hamlets[35],Tower Hamlets[20],Hackney
Tower Hamlets,Tower Hamlets[31],Tower Hamlets[26]
Waltham Forest
Hackney
Hackney
Newham
Newham
Newham
Hackney
Hackney
Hackney
Islington,Islington & City,City[9]
Islington[3]
City[5],Tower Hamlets
City[22]
City, Westminster
Redbridge
Islington[12],Islington,Islington[36],Hackney
Haringey,Haringey and Barnet
Barnet[32],Barnet
Barnet
Enfield[7]
Haringey[26]
Barnet
Enfield
Enfield,Barnet
Haringey
Hackney
Haringey
Hackney
Islington[7],Islington
Barnet
Barnet
Barnet
Enfield
Haringey[27],Haringey
Barnet
Haringey,Hackney,Haringey, Islington
Haringey
Islington
Camden
Islington
Islington
Haringey
Enfield
Camden[35],Camden
Camden
Brent,Brent, Ealing,Hammersmith and Fulham
Brent
Barnet
Brent,Barnet,Barnet, Brent, Camden
B

Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Dartford,51.4406,0.144600,Ferraris,51.441699,0.146755,Italian Restaurant
1,Dartford,51.4406,0.144600,King's Head,51.441730,0.149785,Pub
2,Dartford,51.4406,0.144600,Old Bexley Greek Taverna,51.441405,0.150640,Greek Restaurant
3,Dartford,51.4406,0.144600,The George,51.441756,0.150429,Pub
4,Dartford,51.4406,0.144600,Nisa Local,51.441233,0.148158,Supermarket
...,...,...,...,...,...,...,...
4003,"Westminster,Westminster[5],Camden",51.5142,-0.123382,Masala Zone,51.513253,-0.122922,Indian Restaurant
4004,"Westminster,Westminster[5],Camden",51.5142,-0.123382,Augustus Harris,51.512755,-0.120701,Wine Bar
4005,"Westminster,Westminster[5],Camden",51.5142,-0.123382,Allbirds,51.512652,-0.125187,Shoe Store
4006,"Westminster,Westminster[5],Camden",51.5142,-0.123382,Udderlicious,51.513720,-0.127411,Ice Cream Shop


<h5>Now we will be working on finding the most frequent categories that appear within each borough</h5>

In [76]:
london_onehot = pd.get_dummies(all_london_venues[['Venue Category']], prefix="", prefix_sep="")


london_onehot['Borough'] = all_london_venues['Borough'] 

fixed_columns = [london_onehot.columns[-1]] + list(london_onehot.columns[:-1])
london_onehot = london_onehot[fixed_columns]

london_grouped = london_onehot.groupby('Borough').mean().reset_index()
london_grouped

Unnamed: 0,Borough,Accessories Store,African Restaurant,American Restaurant,Antique Shop,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Video Game Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio
0,Barnet,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,...,0.0,0.008197,0.0,0.0,0.000000,0.016393,0.0,0.0,0.0,0.008197
1,"Barnet[32],Barnet",0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,...,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000
2,"Barnet[34],Barnet",0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,...,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000
3,"Bexley, Greenwich[2],Bexley",0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,...,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000
4,Brent,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,...,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.076923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,Westminster,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,...,0.0,0.000000,0.0,0.0,0.016667,0.016667,0.0,0.0,0.0,0.016667
75,"Westminster,Camden",0.0,0.0,0.02,0.0,0.0,0.02,0.06,0.00,0.02,...,0.0,0.040000,0.0,0.0,0.020000,0.000000,0.0,0.0,0.0,0.000000
76,"Westminster,Westminster[5],Camden",0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.02,0.00,...,0.0,0.000000,0.0,0.0,0.020000,0.000000,0.0,0.0,0.0,0.000000
77,"Westminster[14],Westminster",0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,...,0.0,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000


In [77]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

In [87]:
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


boroughs_venues_sorted = pd.DataFrame(columns=columns)
boroughs_venues_sorted['Borough'] = london_grouped['Borough']

for ind in np.arange(london_grouped.shape[0]):
    boroughs_venues_sorted.iloc[ind, 1:] = return_most_common_venues(london_grouped.iloc[ind, :], num_top_venues)
    
to_combine_london = boroughs_venues_sorted
boroughs_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Barnet,Café,Grocery Store,Pub,Coffee Shop,Bakery,Italian Restaurant,Gym / Fitness Center,Turkish Restaurant,Park,Deli / Bodega
1,"Barnet[32],Barnet",Grocery Store,Sporting Goods Shop,Fast Food Restaurant,Toy / Game Store,Gym,Coffee Shop,Furniture / Home Store,Hardware Store,Residential Building (Apartment / Condo),Pet Store
2,"Barnet[34],Barnet",Athletics & Sports,Yoga Studio,Ethiopian Restaurant,Exhibit,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Film Studio
3,"Bexley, Greenwich[2],Bexley",Indian Restaurant,Garden Center,Chinese Restaurant,Grocery Store,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Food & Drink Shop,Food Truck
4,Brent,Bus Stop,Cosmetics Shop,Food & Drink Shop,Music Store,Gym / Fitness Center,Bakery,Restaurant,Yoga Studio,Grocery Store,Deli / Bodega


<h3>Now that we have the most common venue categories within each borough, we can start clustering them together using k means clustering.</h3>

In [79]:
kclusters = 5

london_grouped_clustering = london_grouped.drop('Borough', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(london_grouped_clustering)

boroughs_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

london_merged = df

london_merged = london_merged.join(boroughs_venues_sorted.set_index('Borough'), on='Borough')
london_merged

Unnamed: 0,Postcode,Location,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,DA5,Dartford,Dartford,51.4406,0.144600,0,Pub,Fast Food Restaurant,Greek Restaurant,Supermarket,Indian Restaurant,Chinese Restaurant,Italian Restaurant,Breakfast Spot,Train Station,Exhibit
1,E1,"Ratcliff,Shadwell,Wapping,Whitechapel,Mile End...",Tower Hamlets,51.5000,-0.061773,2,Pub,Park,Bus Stop,Fried Chicken Joint,Theater,Brazilian Restaurant,Breakfast Spot,Grocery Store,History Museum,Metro Station
2,E10,Lea Bridge,Hackney,51.5596,-0.007800,2,Café,Pub,Italian Restaurant,Grocery Store,Pizza Place,Park,Restaurant,Coffee Shop,Breakfast Spot,Gym / Fitness Center
3,E10,Leyton,Waltham Forest,51.5596,-0.007800,0,Café,Pub,Coffee Shop,Grocery Store,Fast Food Restaurant,Sandwich Place,Bakery,Restaurant,Clothing Store,Pizza Place
4,E11,"Leytonstone,Cann Hall,Snaresbrook,Wanstead","Waltham Forest,Redbridge, Waltham Forest,Walth...",51.5843,0.017167,2,English Restaurant,Restaurant,Chinese Restaurant,Tea Room,Lake,Bus Stop,Hotel,Dim Sum Restaurant,Falafel Restaurant,Food Court
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,W9,Maida Vale,Westminster,51.5274,-0.184580,2,Café,Pub,Grocery Store,Thai Restaurant,Coffee Shop,Deli / Bodega,Italian Restaurant,Japanese Restaurant,French Restaurant,Bakery
147,W9,Little Venice,Westminster,51.5274,-0.184580,2,Café,Pub,Grocery Store,Thai Restaurant,Coffee Shop,Deli / Bodega,Italian Restaurant,Japanese Restaurant,French Restaurant,Bakery
148,WC1,"Bloomsbury,King's Cross,St Pancras","Camden and Islington,Camden[24],Camden",51.5175,-0.122300,2,Coffee Shop,Pub,Burger Joint,Hotel,Café,Restaurant,History Museum,Theater,Donut Shop,Seafood Restaurant
149,WC1,Holborn,Camden,51.5175,-0.122300,2,Pub,Coffee Shop,French Restaurant,Cocktail Bar,Pizza Place,Gym / Fitness Center,Sandwich Place,Italian Restaurant,Café,Grocery Store


It seems there was a postcode that the latitude and longitude could not be found for so we will omit this district.

In [62]:
print(london_merged[london_merged['Latitude'].isnull()])
print(london_merged[london_merged['Cluster Labels'].isnull()])
london_merged = london_merged[london_merged['Postcode'] != 'N9']
london_merged

   Postcode  Location  Borough  Latitude  Longitude  Cluster Labels  \
57       N9  Edmonton  Enfield       NaN        NaN               0   

   1st Most Common Venue 2nd Most Common Venue 3rd Most Common Venue  \
57           Coffee Shop    Italian Restaurant         Grocery Store   

   4th Most Common Venue 5th Most Common Venue 6th Most Common Venue  \
57           Supermarket                   Pub                   Bar   

   7th Most Common Venue 8th Most Common Venue 9th Most Common Venue  \
57     Indian Restaurant           Pizza Place                  Café   

   10th Most Common Venue  
57   Fast Food Restaurant  
Empty DataFrame
Columns: [Postcode, Location, Borough, Latitude, Longitude, Cluster Labels, 1st Most Common Venue, 2nd Most Common Venue, 3rd Most Common Venue, 4th Most Common Venue, 5th Most Common Venue, 6th Most Common Venue, 7th Most Common Venue, 8th Most Common Venue, 9th Most Common Venue, 10th Most Common Venue]
Index: []


Unnamed: 0,Postcode,Location,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,DA5,Dartford,Dartford,51.4406,0.144600,0,Pub,Fast Food Restaurant,Greek Restaurant,Supermarket,Indian Restaurant,Chinese Restaurant,Italian Restaurant,Breakfast Spot,Train Station,Exhibit
1,E1,"Ratcliff,Shadwell,Wapping,Whitechapel,Mile End...",Tower Hamlets,51.5000,-0.061773,2,Pub,Park,Bus Stop,Fried Chicken Joint,Theater,Brazilian Restaurant,Breakfast Spot,Grocery Store,History Museum,Metro Station
2,E10,Lea Bridge,Hackney,51.5596,-0.007800,2,Café,Pub,Italian Restaurant,Grocery Store,Pizza Place,Park,Restaurant,Coffee Shop,Breakfast Spot,Gym / Fitness Center
3,E10,Leyton,Waltham Forest,51.5596,-0.007800,0,Café,Pub,Coffee Shop,Grocery Store,Fast Food Restaurant,Sandwich Place,Bakery,Restaurant,Clothing Store,Pizza Place
4,E11,"Leytonstone,Cann Hall,Snaresbrook,Wanstead","Waltham Forest,Redbridge, Waltham Forest,Walth...",51.5843,0.017167,2,English Restaurant,Restaurant,Chinese Restaurant,Tea Room,Lake,Bus Stop,Hotel,Dim Sum Restaurant,Falafel Restaurant,Food Court
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,W9,Maida Vale,Westminster,51.5274,-0.184580,2,Café,Pub,Grocery Store,Thai Restaurant,Coffee Shop,Deli / Bodega,Italian Restaurant,Japanese Restaurant,French Restaurant,Bakery
147,W9,Little Venice,Westminster,51.5274,-0.184580,2,Café,Pub,Grocery Store,Thai Restaurant,Coffee Shop,Deli / Bodega,Italian Restaurant,Japanese Restaurant,French Restaurant,Bakery
148,WC1,"Bloomsbury,King's Cross,St Pancras","Camden and Islington,Camden[24],Camden",51.5175,-0.122300,2,Coffee Shop,Pub,Burger Joint,Hotel,Café,Restaurant,History Museum,Theater,Donut Shop,Seafood Restaurant
149,WC1,Holborn,Camden,51.5175,-0.122300,2,Pub,Coffee Shop,French Restaurant,Cocktail Bar,Pizza Place,Gym / Fitness Center,Sandwich Place,Italian Restaurant,Café,Grocery Store


In [63]:
london_lat = 51.5074
london_long = 0.1278

map_clusters = folium.Map(location=[london_lat, london_long], zoom_start=10)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(london_merged['Latitude'], london_merged['Longitude'], london_merged['Borough'], london_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h1><center>Toronto Distribution</center></h1>

The same process that was followed for London is now replicated until the creation of the map after which we look to combine the datasets.

In [17]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)
response.text

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title>List of postal codes of Canada: M - Wikipedia</title>\n<script>document.documentElement.className="client-js";RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":916835432,"wgRevisionId":916835432,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":!1,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","

In [18]:
soup = BeautifulSoup(response.text)
tables = [
    [
        [td.get_text(strip=True) for td in tr.find_all('td')] for tr in table.find_all('tr')
    ] 
    for table in soup.find_all('table')
]
tables

[[[],
  ['M1A', 'Not assigned', 'Not assigned'],
  ['M2A', 'Not assigned', 'Not assigned'],
  ['M3A', 'North York', 'Parkwoods'],
  ['M4A', 'North York', 'Victoria Village'],
  ['M5A', 'Downtown Toronto', 'Harbourfront'],
  ['M5A', 'Downtown Toronto', 'Regent Park'],
  ['M6A', 'North York', 'Lawrence Heights'],
  ['M6A', 'North York', 'Lawrence Manor'],
  ['M7A', "Queen's Park", 'Not assigned'],
  ['M8A', 'Not assigned', 'Not assigned'],
  ['M9A', 'Etobicoke', 'Islington Avenue'],
  ['M1B', 'Scarborough', 'Rouge'],
  ['M1B', 'Scarborough', 'Malvern'],
  ['M2B', 'Not assigned', 'Not assigned'],
  ['M3B', 'North York', 'Don Mills North'],
  ['M4B', 'East York', 'Woodbine Gardens'],
  ['M4B', 'East York', 'Parkview Hill'],
  ['M5B', 'Downtown Toronto', 'Ryerson'],
  ['M5B', 'Downtown Toronto', 'Garden District'],
  ['M6B', 'North York', 'Glencairn'],
  ['M7B', 'Not assigned', 'Not assigned'],
  ['M8B', 'Not assigned', 'Not assigned'],
  ['M9B', 'Etobicoke', 'Cloverdale'],
  ['M9B', 'Etobi

In [19]:
postcode = []
borough = []
neighbourhood = []

for i in range(1, 289):
    postcode_to_add = tables[0][i][0]
    borough_to_add = tables[0][i][1]
    neighbourhood_to_add = tables[0][i][2]
    
    postcode.append(postcode_to_add)
    borough.append(borough_to_add)
    neighbourhood.append(neighbourhood_to_add)
    
postcode

['M1A',
 'M2A',
 'M3A',
 'M4A',
 'M5A',
 'M5A',
 'M6A',
 'M6A',
 'M7A',
 'M8A',
 'M9A',
 'M1B',
 'M1B',
 'M2B',
 'M3B',
 'M4B',
 'M4B',
 'M5B',
 'M5B',
 'M6B',
 'M7B',
 'M8B',
 'M9B',
 'M9B',
 'M9B',
 'M9B',
 'M9B',
 'M1C',
 'M1C',
 'M1C',
 'M2C',
 'M3C',
 'M3C',
 'M4C',
 'M5C',
 'M6C',
 'M7C',
 'M8C',
 'M9C',
 'M9C',
 'M9C',
 'M9C',
 'M1E',
 'M1E',
 'M1E',
 'M2E',
 'M3E',
 'M4E',
 'M5E',
 'M6E',
 'M7E',
 'M8E',
 'M9E',
 'M1G',
 'M2G',
 'M3G',
 'M4G',
 'M5G',
 'M6G',
 'M7G',
 'M8G',
 'M9G',
 'M1H',
 'M2H',
 'M3H',
 'M3H',
 'M3H',
 'M4H',
 'M5H',
 'M5H',
 'M5H',
 'M6H',
 'M6H',
 'M7H',
 'M8H',
 'M9H',
 'M1J',
 'M2J',
 'M2J',
 'M2J',
 'M3J',
 'M3J',
 'M4J',
 'M5J',
 'M5J',
 'M5J',
 'M6J',
 'M6J',
 'M7J',
 'M8J',
 'M9J',
 'M1K',
 'M1K',
 'M1K',
 'M2K',
 'M3K',
 'M3K',
 'M4K',
 'M4K',
 'M5K',
 'M5K',
 'M6K',
 'M6K',
 'M6K',
 'M7K',
 'M8K',
 'M9K',
 'M1L',
 'M1L',
 'M1L',
 'M2L',
 'M2L',
 'M3L',
 'M4L',
 'M4L',
 'M5L',
 'M5L',
 'M6L',
 'M6L',
 'M6L',
 'M7L',
 'M8L',
 'M9L',
 'M1M',
 'M1M',


In [20]:
data = list(zip(postcode, borough, neighbourhood))
df = pd.DataFrame(data, columns=['Postcode', 'Borough', 'Neighbourhood'])

In [21]:
df = df[df.Borough != 'Not assigned']
df = df.groupby(['Postcode', 'Borough'], as_index=False)['Neighbourhood'].apply(','.join)
df = df.reset_index()
df

Unnamed: 0,Postcode,Borough,0
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie..."
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam..."


In [22]:
df.columns = ['Postcode', 'Borough', 'Neighbourhood']
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie..."
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam..."


In [23]:
df_latlong = pd.read_csv('http://cocl.us/Geospatial_data')
df_total = df.join(df_latlong)
df_total = df_total.drop(columns=['Postal Code'])
df_total

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",43.739416,-79.588437


In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Borough Latitude', 
                  'Borough Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

all_venues = getNearbyVenues(names=df_total['Borough'],
                            latitudes=df_total['Latitude'],
                            longitudes=df_total['Longitude'])

all_venues

Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
East York
East York
East Toronto
East York
East York
East York
East Toronto
East Toronto
East Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
North York
Central Toronto
Central Toronto
Central Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
North York
North York
York
York
Downtown Toronto
Wes

Unnamed: 0,Borough,Borough Latitude,Borough Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Scarborough,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,Scarborough,43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
2,Scarborough,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,Scarborough,43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,Scarborough,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
...,...,...,...,...,...,...,...
1701,Etobicoke,43.739416,-79.588437,Tim Hortons,43.742015,-79.589690,Coffee Shop
1702,Etobicoke,43.739416,-79.588437,Pizza Nova,43.736761,-79.589817,Pizza Place
1703,Etobicoke,43.706748,-79.594054,Economy Rent A Car,43.708471,-79.589943,Rental Car Location
1704,Etobicoke,43.706748,-79.594054,Logistics Distribution,43.707554,-79.589252,Bar


In [25]:
toronto_onehot = pd.get_dummies(all_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot

Unnamed: 0,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1701,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1702,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1703,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1704,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
toronto_onehot['Borough'] = all_venues['Borough'] 

fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_grouped = toronto_onehot.groupby('Borough').mean().reset_index()

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

In [89]:
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


toronto_boroughs_venues_sorted = pd.DataFrame(columns=columns)
toronto_boroughs_venues_sorted['Borough'] = toronto_grouped['Borough']

for ind in np.arange(toronto_grouped.shape[0]):
    toronto_boroughs_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

    
to_combine_toronto = toronto_boroughs_venues_sorted
toronto_boroughs_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Pizza Place,Sandwich Place,Park,Café,Restaurant,Gym,Sushi Restaurant,Dessert Shop,Pub
1,Downtown Toronto,Coffee Shop,Café,Restaurant,Bakery,Hotel,Gastropub,Park,Japanese Restaurant,Steakhouse,Italian Restaurant
2,East Toronto,Greek Restaurant,Coffee Shop,Italian Restaurant,Café,Brewery,Ice Cream Shop,Pizza Place,Park,Yoga Studio,Sandwich Place
3,East York,Coffee Shop,Burger Joint,Park,Sandwich Place,Sporting Goods Shop,Bank,Pizza Place,Pharmacy,Liquor Store,Supermarket
4,Etobicoke,Pizza Place,Sandwich Place,Pharmacy,Coffee Shop,Fast Food Restaurant,Grocery Store,Gym,Pool,Bakery,Beer Store


In [73]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Borough', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

toronto_boroughs_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_total

toronto_merged = toronto_merged.join(toronto_boroughs_venues_sorted.set_index('Borough'), on='Borough')
toronto_grouped_clustering

Unnamed: 0,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,...,0.0,0.008333,0.0,0.0,0.008333,0.0,0.0,0.0,0.0,0.008333
1,0.0,0.001307,0.001307,0.001307,0.002614,0.002614,0.002614,0.013072,0.0,0.002614,...,0.002614,0.011765,0.001307,0.0,0.003922,0.0,0.005229,0.001307,0.0,0.003922
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022901,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022901
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.013514,0.0,0.013514,0.0,0.0,0.0,0.013514
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.004386,0.004386,0.0,0.0,0.0,0.0,0.0,0.008772,0.0,0.0,...,0.0,0.0,0.004386,0.004386,0.008772,0.0,0.0,0.004386,0.013158,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,...,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006173,0.0,...,0.0,0.012346,0.0,0.0,0.012346,0.0,0.006173,0.0,0.0,0.006173


In [68]:
toronto_lat = 43.6532
toronto_long = -79.3832

map_clusters_toronto = folium.Map(location=[toronto_lat, toronto_long], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_toronto)
       
map_clusters_toronto

In [64]:
map_clusters

<h1><center>Combining Cities</center></h1>

<h3>Here, lets see if we can find similar neighbourhoods within both cities and compare how they are distributed.</h3>

In [90]:
kclusters = 5

combined_df = london_grouped.append(toronto_grouped, ignore_index=True)

combined_df_clustering = combined_df.drop('Borough', 1)

combined_boroughs_venues_sorted = to_combine_toronto.append(to_combine_london, ignore_index=True)
combined_df_clustering = combined_df_clustering.fillna(0)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(combined_df_clustering)
combined_boroughs_venues_sorted
combined_boroughs_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


In [94]:
df = df.rename({'Location': 'Neighbourhood'}, axis=1)
df = df[['Postcode', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude']]
both_merged = df_total.append(df, ignore_index=True)
both_merged = both_merged.dropna()

both_merged = both_merged.join(combined_boroughs_venues_sorted.set_index('Borough'), on='Borough')
both_merged

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,3,Fast Food Restaurant,Coffee Shop,Chinese Restaurant,Breakfast Spot,Bakery,Pizza Place,Intersection,Skating Rink,Rental Car Location,Bus Line
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,3,Fast Food Restaurant,Coffee Shop,Chinese Restaurant,Breakfast Spot,Bakery,Pizza Place,Intersection,Skating Rink,Rental Car Location,Bus Line
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,3,Fast Food Restaurant,Coffee Shop,Chinese Restaurant,Breakfast Spot,Bakery,Pizza Place,Intersection,Skating Rink,Rental Car Location,Bus Line
3,M1G,Scarborough,Woburn,43.770992,-79.216917,3,Fast Food Restaurant,Coffee Shop,Chinese Restaurant,Breakfast Spot,Bakery,Pizza Place,Intersection,Skating Rink,Rental Car Location,Bus Line
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,3,Fast Food Restaurant,Coffee Shop,Chinese Restaurant,Breakfast Spot,Bakery,Pizza Place,Intersection,Skating Rink,Rental Car Location,Bus Line
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,W9,Westminster,Maida Vale,51.527400,-0.184580,3,Café,Pub,Grocery Store,Thai Restaurant,Coffee Shop,Deli / Bodega,Italian Restaurant,Japanese Restaurant,French Restaurant,Bakery
250,W9,Westminster,Little Venice,51.527400,-0.184580,3,Café,Pub,Grocery Store,Thai Restaurant,Coffee Shop,Deli / Bodega,Italian Restaurant,Japanese Restaurant,French Restaurant,Bakery
251,WC1,"Camden and Islington,Camden[24],Camden","Bloomsbury,King's Cross,St Pancras",51.517500,-0.122300,1,Coffee Shop,Pub,Burger Joint,Hotel,Café,Restaurant,History Museum,Theater,Donut Shop,Seafood Restaurant
252,WC1,Camden,Holborn,51.517500,-0.122300,3,Pub,Coffee Shop,French Restaurant,Cocktail Bar,Pizza Place,Gym / Fitness Center,Sandwich Place,Italian Restaurant,Café,Grocery Store


In [96]:
toronto_lat = 43.6532
toronto_long = -79.3832

map_clusters_combined = folium.Map(location=[toronto_lat, toronto_long], zoom_start=3)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(both_merged['Latitude'], both_merged['Longitude'], both_merged['Borough'], both_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_combined)
       
map_clusters_combined