In [817]:
# Import the required libraries
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from pandas.io.json import json_normalize

In [818]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
url_text = requests.get(url).text


In [819]:
soup = BeautifulSoup(url_text, 'lxml')

In [820]:
t = soup.find('table') # Looks like there is just one table so you need not use find_all

In [821]:
headers = t.find('tr') # Use find to load the row with the headers

In [822]:
col_h = headers.find_all('th')
col_names = []
for h in col_h:
    col_names.append(h.text) # Iterate over th to get the header name

In [823]:
col_names[2] = col_names[2][:-1]  # Last header apparently containns \n.. Remove the header
col_names

['Postcode', 'Borough', 'Neighbourhood']

In [824]:
# Find all the td .. We need to pick three tds at a time . This will avoid iterating over tr 

rows = t.find_all('td')
rows[2].text  # Note the \n.. Code put in to while loop to remove this as shown below


'Not assigned\n'

In [825]:
# Iterate over the tds to get the required columns
postcode = []
borough = []
neighbourhood = []
count = 0
while count < len(rows):
    postcode.append(rows[count].text)
    count+=1
    borough.append(rows[count].text)
    count+=1
    neighbourhood.append(rows[count].text[:-1])
    count+=1

In [826]:
#Create data frame with the three columns
data_dict = {'Postcode':postcode,
             'Borough':borough,
             'Neighbourhood':neighbourhood}
# Note that mentioning the columns will ensure that column order is maintained
df = pd.DataFrame(data_dict, columns=['Postcode','Borough','Neighbourhood'])


In [827]:
df.shape

(289, 3)

In [828]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [829]:
df = df[df['Borough']!='Not assigned'] # Removing all the entries where Borough is 'Not Assigned'

In [830]:
# Use numpy to replace Neighbourbood mentioned as  Not assigned with Borough
df["Neighbourhood"] = np.where(df["Neighbourhood"]=='Not assigned', df["Borough"],df["Neighbourhood"])
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [831]:
df.shape

(212, 3)

In [832]:
#  Adding semicolumn in the end of neighbourhood to have the neighbourhoods comma seperated while consolidating

df["Neighbourhood"] = df["Neighbourhood"] +','

In [833]:
# Verifying the total unique postcodes. This will be verifying against the csv file with the post code
len(set(df['Postcode']))

103

In [834]:
# Run a group by based on post code and borough and sum it, which will concatenate the neighbourhoods 
df = df.groupby(['Postcode','Borough']).sum()
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern,"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union,"
M1E,Scarborough,"Guildwood,Morningside,West Hill,"
M1G,Scarborough,"Woburn,"
M1H,Scarborough,"Cedarbrae,"


In [835]:
df = df.reset_index() # Flatten the dataframe

In [836]:
# Remove the semi colon from the neighbourhood
df['Neighbourhood'] = df['Neighbourhood'].apply(lambda x: x[0:-1])

In [837]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [838]:
# Gosh I wasted time using the google api. api gets timed out all the time. Foursqure api also provides decent 
# data but coursera gives option of only google api or the use of the CSV file provided. Hence used the CSV file
# Load thje file
geo_data = pd.read_csv('Geospatial_Coordinates.csv')
# Set the index to Postal code so that we can look up directly
geo_data.set_index('Postal Code',inplace=True)

def getlatlong(postalcode):
    lat = None
    long = None
    try:
        lat = geo_data.loc[postalcode][0]
    except:
        pass
    try:
        long = geo_data.loc[postalcode][1]
    except:
        pass
    return lat,long
 


In [839]:
df['latitude'] = df['Postcode'].apply(lambda x : getlatlong(x)[0]) # Add latitude column
df['longitude'] = df['Postcode'].apply(lambda x : getlatlong(x)[1])# Add longitude column

In [840]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,latitude,longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [841]:
df.shape

(103, 5)

In [842]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,latitude,longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [843]:
# Import folium and load th map for Toronto, Canada

import folium
address = 'Toronto,Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude

In [844]:
map_t = folium.Map(location = [latitude, longitude], zoom_start = 10)

In [845]:
# Mark all the Borough / Neighbourhoods using folium
for lat,long,borough,neighbourhood in zip(df['latitude'],df['longitude'],df['Borough'],df['Neighbourhood']):    
   label = '{}, {}'.format(neighbourhood, borough)
   label = folium.Popup(label, parse_html=True)
   folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#4186FF',
        fill_opacity=0.7).add_to(map_t) 

In [846]:
map_t

In [847]:
df['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

In [848]:
#Picked the first element for analysis to make the map bit more readable and ease -->Scarborough
address = 'Scarborough,Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude,longitude)

43.773077 -79.257774


In [849]:
df_scarborough = df[df['Borough']=='Scarborough']

In [850]:
df_scarborough

Unnamed: 0,Postcode,Borough,Neighbourhood,latitude,longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [851]:
map_scarborough = folium.Map(location = [latitude, longitude], zoom_start = 10)

In [852]:
map_scarborough

In [853]:
# Mark all the neighbourhood for scarborough
for lat,long,borough,neighbourhood in zip(df_scarborough['latitude'],df_scarborough['longitude'],df_scarborough['Borough'],df_scarborough['Neighbourhood']):    
   label = '{}, {}'.format(neighbourhood, borough)
   label = folium.Popup(label, parse_html=True)
   folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#4186FF',
        fill_opacity=0.7).add_to(map_scarborough) 

In [854]:
map_scarborough

# Neighbourhood segmentation for Scarborough

1. Aim to find out all the venues in and around .Scarborough 
2. Find out the categories of each venue under each of the neighbourhoods
3. Find out the availability (mean againt the total) of the venues under each neighbourhood
4. Cluster the neighbourhood
5. Map it out using Folium and mark the different neighbourhoods in differnet colors based on the cluster

In [855]:
CLIENT_ID='ZOO51K0TRLO3V1BXETAEBMWHHIO2C0DXTW05UWSNGTBS5G0E'
CLIENT_SECRET='ODKWIW1VFAKDYQ1PQNNJQU1RQDCA2QMV0UNTHXJHPBCR422B'
VERSION='20181511'
LIMIT=30
RADIUS = 500


In [856]:
latitude = df_scarborough['latitude'][10]
longitude = df_scarborough['longitude'][10]

In [857]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, RADIUS, LIMIT)

In [858]:
results = requests.get(url).json()

In [859]:
results.keys()

dict_keys(['meta', 'response'])

In [860]:
results['response'].keys()

dict_keys(['venues', 'confident'])

In [861]:
results['response']['venues'][0].keys()

dict_keys(['id', 'name', 'location', 'categories', 'referralId', 'hasPerk'])

In [862]:
venues = results['response']['venues']
df_venues = json_normalize(venues)
df_venues.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId
0,"[{'id': '4bf58dd8d48988d124951735', 'name': 'A...",False,5228afe611d2a57901449823,145 Nantucket,CA,Scarborough,Canada,,80,"[145 Nantucket, Scarborough ON, Canada]","[{'label': 'display', 'lat': 43.75715341768399...",43.757153,-79.274237,,ON,TireCraft,v-1543401368
1,"[{'id': '50327c8591d4c4b30a586d5d', 'name': 'B...",False,4b920246f964a52045e333e3,300 Midwest Rd.,CA,Scarborough,Canada,,288,"[300 Midwest Rd., Scarborough ON M1P 3A9, Canada]","[{'label': 'display', 'lat': 43.75947159983688...",43.759472,-79.271121,M1P 3A9,ON,Mill St. Brewery,v-1543401368
2,"[{'id': '4f04ae1f2fb6e1c99f3db0ba', 'name': 'C...",False,5b1aa51b1fa763002cf54662,180 Nantucket Blvd,CA,Toronto,Canada,,201,"[180 Nantucket Blvd, Toronto ON M1P 4R6, Canada]","[{'label': 'display', 'lat': 43.757644, 'lng':...",43.757644,-79.275789,M1P 4R6,ON,EE Auto Group,v-1543401368
3,"[{'id': '52f2ab2ebcbc57f1066b8b44', 'name': 'A...",False,5ba9050951950e002c2430d8,170 Nantucket Blvd. Unit #8,CA,Scarborough,Canada,,184,"[170 Nantucket Blvd. Unit #8, Scarborough ON M...","[{'label': 'display', 'lat': 43.75743, 'lng': ...",43.75743,-79.2756,M1P 4R6,ON,The Friendly Mechanic,v-1543401368
4,"[{'id': '4bf58dd8d48988d122951735', 'name': 'E...",False,4aec5e3bf964a52067c621e3,1306 Kennedy Road,CA,Scarborough,Canada,btwn Ellesmere Rd. & Lawrence Ave. E,501,[1306 Kennedy Road (btwn Ellesmere Rd. & Lawre...,"[{'label': 'display', 'lat': 43.75935629419452...",43.759356,-79.278931,M1P 2L5,ON,Canada Computers,v-1543401368


In [863]:
colnames = ['name','categories']+[col for col in df_venues.columns if col.startswith('location')]+['id']
df_venues_updated = df_venues.loc[:,colnames]

In [864]:
def get_category(row):
    category_list =  row['categories']
    if len(category_list)==0:
        return None
    else:
        return category_list[0]['name']

In [865]:
df_venues_updated['categories'] = df_venues_updated.apply(get_category,axis=1)

In [866]:
df_venues_updated.columns = [col.split('.')[-1] for col in df_venues_updated.columns]

In [867]:
df_venues_updated.head()

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,TireCraft,Automotive Shop,145 Nantucket,CA,Scarborough,Canada,,80,"[145 Nantucket, Scarborough ON, Canada]","[{'label': 'display', 'lat': 43.75715341768399...",43.757153,-79.274237,,ON,5228afe611d2a57901449823
1,Mill St. Brewery,Brewery,300 Midwest Rd.,CA,Scarborough,Canada,,288,"[300 Midwest Rd., Scarborough ON M1P 3A9, Canada]","[{'label': 'display', 'lat': 43.75947159983688...",43.759472,-79.271121,M1P 3A9,ON,4b920246f964a52045e333e3
2,EE Auto Group,Car Wash,180 Nantucket Blvd,CA,Toronto,Canada,,201,"[180 Nantucket Blvd, Toronto ON M1P 4R6, Canada]","[{'label': 'display', 'lat': 43.757644, 'lng':...",43.757644,-79.275789,M1P 4R6,ON,5b1aa51b1fa763002cf54662
3,The Friendly Mechanic,Auto Garage,170 Nantucket Blvd. Unit #8,CA,Scarborough,Canada,,184,"[170 Nantucket Blvd. Unit #8, Scarborough ON M...","[{'label': 'display', 'lat': 43.75743, 'lng': ...",43.75743,-79.2756,M1P 4R6,ON,5ba9050951950e002c2430d8
4,Canada Computers,Electronics Store,1306 Kennedy Road,CA,Scarborough,Canada,btwn Ellesmere Rd. & Lawrence Ave. E,501,[1306 Kennedy Road (btwn Ellesmere Rd. & Lawre...,"[{'label': 'display', 'lat': 43.75935629419452...",43.759356,-79.278931,M1P 2L5,ON,4aec5e3bf964a52067c621e3


In [868]:
#Group the rows based on the categories. Looks like Automotive shop is the prominent one in thi location
df_venues_updated.groupby('categories').count()

Unnamed: 0_level_0,name,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
categories,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Arts & Crafts Store,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Auto Garage,1,1,1,1,1,0,1,1,1,1,1,1,1,1
Automotive Shop,10,6,10,6,10,1,10,10,10,10,10,2,7,10
Brewery,1,1,1,1,1,0,1,1,1,1,1,1,1,1
Building,2,1,2,1,2,0,2,2,2,2,2,1,2,2
Car Wash,1,1,1,1,1,0,1,1,1,1,1,1,1,1
Doctor's Office,1,0,1,0,1,0,1,1,1,1,1,0,0,1
Electronics Store,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Factory,1,0,1,0,1,0,1,1,1,1,1,0,0,1
Furniture / Home Store,1,1,1,1,1,0,1,1,1,1,1,1,1,1


In [869]:
#Collect Venues for all the neighbourhoods

df_venue_all = pd.DataFrame()
for neighbourhood,neigh_lat,neigh_long  in zip(df_scarborough['Neighbourhood'],df_scarborough['latitude'],df_scarborough['longitude']):
    #print(neigh_lat,neigh_long)
    neighbourhood_lst.append(neighbourhood)
    neigh_lat_lst.append(neigh_lat)
    neigh_long_lst.append(neigh_long)
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neigh_lat, neigh_long, VERSION, RADIUS, LIMIT)
    results = requests.get(url).json()
    venues = results['response']['venues']
    df_venues = json_normalize(venues)
    #print(df_venues.shape)
    df_venues['Neighbourhood'] = neighbourhood
    df_venues['Neigh_latitude'] = neigh_lat
    df_venues['Neigh_longitude'] = neigh_long
    df_venue_all = df_venue_all.append(df_venues)
    #print(df_venue_all.shape)


In [870]:
df_venue_all['categories'] = df_venue_all.apply(get_category,axis=1)

In [871]:
df_venues_all_grp = df_venue_all.groupby(['Neighbourhood','categories']).count()

In [872]:
df_venues_all_grp.reset_index(inplace=True)


In [873]:
'''
#Below code should not be cut loose. Daily quota will get exhausted and will get stuck

ratings=[]
for vid in df_venues_updated['id']:
    VENUE_ID = vid
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(VENUE_ID,CLIENT_ID, CLIENT_SECRET,VERSION)
    result = requests.get(url).json()
    if 'rating' in result['response']['venue'].keys():
        r = result['response']['venue']['rating']
        ratings.append(r)
    else:
        ratings.append('0')

df_venues_updated['Ratings'] = ratings
#Storing the values to CSV so that api need not be called againg and again
#df_venues_updated.to_csv('df_venues_updated.csv')
''' 

"\n#Below code should not be cut loose. Daily quota will get exhausted and will get stuck\n\nratings=[]\nfor vid in df_venues_updated['id']:\n    VENUE_ID = vid\n    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(VENUE_ID,CLIENT_ID, CLIENT_SECRET,VERSION)\n    result = requests.get(url).json()\n    if 'rating' in result['response']['venue'].keys():\n        r = result['response']['venue']['rating']\n        ratings.append(r)\n    else:\n        ratings.append('0')\n\ndf_venues_updated['Ratings'] = ratings\n#Storing the values to CSV so that api need not be called againg and again\n#df_venues_updated.to_csv('df_venues_updated.csv')\n"

In [874]:
df_venues_all_grp_d = pd.get_dummies(df_venues_all_grp[['categories']], prefix="", prefix_sep="")


df_venues_all_grp_d['Neighbourhood'] = df_venues_all_grp['Neighbourhood'] 

In [875]:
df_venues_all_grp_d_mean = df_venues_all_grp_d.groupby('Neighbourhood').mean() # summation of each row needs to be one. Higher the
#number, more prominant in is the category.  We now have numerical values which can be used to get the distance meaurement while runing kmeans
df_venues_all_grp_d_mean

Unnamed: 0_level_0,Advertising Agency,Arts & Crafts Store,Assisted Living,Auto Dealership,Auto Garage,Automotive Shop,Bakery,Bank,Bar,Beach,...,Theme Park,Thrift / Vintage Store,Trade School,Trail,Train Station,Video Game Store,Vietnamese Restaurant,Warehouse,Yoga Studio,Zoo Exhibit
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Birch Cliff,Cliffside West",0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Clairlea,Golden Mile,Oakridge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.055556,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0
"Clarks Corners,Sullivan,Tam O'Shanter",0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.043478,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Cliffcrest,Cliffside,Scarborough Village West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Dorset Park,Scarborough Town Centre,Wexford Heights",0.0,0.076923,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"East Birchmount Park,Ionview,Kennedy Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0
"Guildwood,Morningside,West Hill",0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.05,0.0,0.0,...,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0


In [876]:
# Define 5 clusters and find the cluster name
from sklearn.cluster import KMeans
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
%matplotlib inline

num_clusters=5
k_means = KMeans(init = 'k-means++',n_clusters=num_clusters,n_init=12)
k_means.fit(df_venues_all_grp_d_mean)
df_venues_all_grp_d_mean['Cluster'] = k_means.labels_

In [877]:
df_venues_all_grp_d_mean.head()

Unnamed: 0_level_0,Advertising Agency,Arts & Crafts Store,Assisted Living,Auto Dealership,Auto Garage,Automotive Shop,Bakery,Bank,Bar,Beach,...,Thrift / Vintage Store,Trade School,Trail,Train Station,Video Game Store,Vietnamese Restaurant,Warehouse,Yoga Studio,Zoo Exhibit,Cluster
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
"Birch Cliff,Cliffside West",0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
"Clairlea,Golden Mile,Oakridge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.055556,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0,2


In [878]:
def add_color(cno):
    if cno==0:
        return '#30412e'
    elif cno==1:
        return '#ebf118'
    elif cno==2:
        return '#6b0ae8'
    elif cno==3:
        return '#2bf4e2'
    else:
        return '2bf4e2'

df_venues_all_grp_d_mean['color'] = df_venues_all_grp_d_mean['Cluster'].apply(add_color)
    

In [879]:
df_venues_all_grp_d_mean.head()

Unnamed: 0_level_0,Advertising Agency,Arts & Crafts Store,Assisted Living,Auto Dealership,Auto Garage,Automotive Shop,Bakery,Bank,Bar,Beach,...,Trade School,Trail,Train Station,Video Game Store,Vietnamese Restaurant,Warehouse,Yoga Studio,Zoo Exhibit,Cluster,color
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Agincourt,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,#ebf118
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,2bf4e2
"Birch Cliff,Cliffside West",0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,#2bf4e2
Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,#30412e
"Clairlea,Golden Mile,Oakridge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.055556,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0,2,#6b0ae8


In [880]:
address = 'Scarborough,Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude,longitude)

43.773077 -79.257774


In [881]:
map_scarborough = folium.Map(location = [latitude, longitude], zoom_start = 11)

In [882]:
for lat,long,borough,neighbourhood in zip(df_scarborough['latitude'],df_scarborough['longitude'],df_scarborough['Borough'],df_scarborough['Neighbourhood']):    
   label = '{}, {}'.format(neighbourhood, borough)
   label = folium.Popup(label, parse_html=True)
   folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color=df_venues_all_grp_d_mean.loc[neighbourhood,'color'],
        fill_opacity=0.7).add_to(map_scarborough) 
map_scarborough