## Import dependencies

In [2]:
"""
WEB SCRAPING TORONTO DATA FROM WIKIPEDIA
GETTING GEOLOCATION DATA 
ANALYSING NEIGHBOURHOOD DATA
"""
import pandas as pd
import requests
import bs4 as bs
import geocoder
import numpy as np
import folium
import math

import json
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

# WEB SCRAPING TORONTO DATA FROM WIKIPEDIA

Define the url of the wikipedia page and load the relevant table into a beautiful soup object

In [3]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

#download a webpage as html data in a scrape with:
Web = requests.get(url)

#html content can then be analysed with:
soup = bs.BeautifulSoup(Web.text, "html.parser")


table_contents=[]
table=soup.find('table')

Parse the table to collect relevant information for our dataframe

In [4]:
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

#print(table_contents)

Convert the table into a dataframe 

In [5]:
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto Business,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


# GETTING GEOLOCATION DATA 

Collecting the data using the geocoder package. The package does not fetch any data. The code to use it has been included but instead the csv is used.

In [6]:
"""
Geocoder version, wouldnt complete API call so alternative version is below.
"""
# #Get the list of postal code information
# postal_list = list(df["PostalCode"])

# latitude = []
# longitude = []
# #DO THIS IN A LOOP FOR EVERY POSTAL CODE
# for i in range(len(postal_list)):
#     postal_code = postal_list[i]
#     # initialize your variable to None
#     lat_lng_coords = None
    
#     # loop until you get the coordinates
#     while(lat_lng_coords is None):
#       g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#       lat_lng_coords = g.latlng
   
#     #Assign the coordinates to the dataframe
#     latitude.append(lat_lng_coords[0])
#     longitude.append(lat_lng_coords[1])
    
# df["lat"] = latitude
# df["long"] = longitude

# #Get the list of postal code information
# postal_list = list(df["PostalCode"])
"""
Reading the latlong from the csv provided and adding to our dataframe
"""
geo_df = pd.read_csv("/Users/stanleyrichards/Documents/Code/Notebooks/Geospatial_Coordinates.csv")

geo_df

postal_list = list(df["PostalCode"])
latitude = []
longitude = []
#DO THIS IN A LOOP FOR EVERY POSTAL CODE
for i in range(len(postal_list)):
    postal_code = postal_list[i]
    # Read the csv for the latlong
    lat = geo_df.loc[geo_df['Postal Code'] == postal_code, 'Latitude'].values[0]
    long = geo_df.loc[geo_df['Postal Code'] == postal_code, 'Longitude'].values[0]
    lat_lng_coords = [lat, long]
    
    
    #Assign the coordinates to the dataframe
    latitude.append(lat_lng_coords[0])
    longitude.append(lat_lng_coords[1])
    
df["Latitude"] = latitude
df["Longitude"] = longitude
pd.set_option('display.max_rows', None)
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# EXPLORING AND CLUSTERING NEIGHBORHOODS 

# Exploring Neighborhoods in Toronto

In [7]:
address = 'Toronto, Ontario'

location = geocoder.google(address)

latitude = location.latitude
longitude = location.longitude
if latitude == None:
    #Then geocoder is again not working and we will just have to do it manually
    longitude = 79.3832
    latitude = 43.6532
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6532, 79.3832.


In [8]:
map_toronto = folium.Map(location=[latitude, -longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [9]:
CLIENT_ID = '54IF2K3CPOYH1NM4ZUWRMZVYI53TJFRKYB3VASQOXN5PMU3P' # your Foursquare ID
CLIENT_SECRET = 'YAEJEAZYCIB5IY413540YA3DC3F2JM5IPGEBCOCP5GAOBWIF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 54IF2K3CPOYH1NM4ZUWRMZVYI53TJFRKYB3VASQOXN5PMU3P
CLIENT_SECRET:YAEJEAZYCIB5IY413540YA3DC3F2JM5IPGEBCOCP5GAOBWIF


# Analyse the first neighborhood

Getting the name and location details of the first neighborhood from the dataframe created above

In [10]:
print("The first neighborhood is " + df.loc[0, 'Neighborhood'])

neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

The first neighborhood is Parkwoods
Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


Creating a foursquare api access URL

In [11]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius


url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=54IF2K3CPOYH1NM4ZUWRMZVYI53TJFRKYB3VASQOXN5PMU3P&client_secret=YAEJEAZYCIB5IY413540YA3DC3F2JM5IPGEBCOCP5GAOBWIF&v=20180605&ll=43.7532586,-79.3296565&radius=500&limit=100'

Getting the results of the API call in json format and defining a function to parse this json into a dataframe.

In [12]:
results = requests.get(url).json()

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Creating a dataframe of events returned by the API call for this first neighborhood.

In [13]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,KFC,Fast Food Restaurant,43.754387,-79.333021
1,Brookbanks Park,Park,43.751976,-79.33214
2,Variety Store,Food & Drink Shop,43.751974,-79.333114


In [14]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.


This first neighborhood has few venues around it.

# Analyse All Neighborhoods

First defining a function to do the above analysis of nearby neighborhoods automatically.

In [15]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Now using this function we can loop through the neighborhoods in our dataframe containing all venues returned.

In [16]:
toronto_venues = getNearbyVenues(df['Neighborhood'], df['Latitude'], df['Longitude'], radius=500)
#toronto_venues.head()

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills North
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview East
The Danforth

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
1,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [17]:
print("A total of {} venues have been returned".format(toronto_venues.shape[0]))
print("Grouped by neighborhood:")
#toronto_venues.groupby('Neighborhood').count()

A total of 1992 venues have been returned
Grouped by neighborhood:


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",7,7,7,7,7,7
"Bathurst Manor, Wilson Heights, Downsview North",17,17,17,17,17,17
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
Berczy Park,46,46,46,46,46,46
"Birch Cliff, Cliffside West",4,4,4,4,4,4
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",16,16,16,16,16,16
Caledonia-Fairbanks,4,4,4,4,4,4


In [18]:
print('There are {} unique venue categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 257 unique venue categories.


# Cluster Neighborhoods

To cluster the neighborhoods in Toronto we will compare the venue categories in each neighborhood.

In [19]:
#Turning the categorical variable into a one hot encoded numerical values
toronto_onehot = None
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot.drop(columns = ['Neighborhood'], inplace=True)


# add neighborhood column back to dataframe in the first index
data_ins = toronto_venues['Neighborhood']
toronto_onehot.insert(0, 'Neighborhood', data_ins)

#Display resulting dataframe
#toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


This dataframe can then be grouped by neighborhood, taking the mean of occurence in each category. This results in 101 neighborhoods each with 272 mean occurences for the various venue categories.

In [20]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
#toronto_grouped
#toronto_grouped.shape

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0625,0.0625,0.125,0.1875,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0


In [21]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
#     print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
#     print('\n')

----Agincourt----
                       venue  freq
0                     Lounge  0.25
1             Breakfast Spot  0.25
2  Latin American Restaurant  0.25
3               Skating Rink  0.25
4          Accessories Store  0.00


----Alderwood, Long Branch----
            venue  freq
0     Pizza Place  0.29
1        Pharmacy  0.14
2  Sandwich Place  0.14
3             Gym  0.14
4     Coffee Shop  0.14


----Bathurst Manor, Wilson Heights, Downsview North----
                venue  freq
0                Bank  0.12
1         Coffee Shop  0.12
2            Pharmacy  0.06
3  Chinese Restaurant  0.06
4         Bridal Shop  0.06


----Bayview Village----
                 venue  freq
0  Japanese Restaurant  0.25
1                 Bank  0.25
2   Chinese Restaurant  0.25
3                 Café  0.25
4    Mobile Phone Shop  0.00


----Bedford Park, Lawrence Manor East----
               venue  freq
0         Restaurant  0.09
1     Sandwich Place  0.09
2        Coffee Shop  0.09
3                

In [66]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Latin American Restaurant,Skating Rink,Breakfast Spot,Dessert Shop,Escape Room,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Gym,Pharmacy,Sandwich Place,Pub,Yoga Studio,Diner,Department Store,Dessert Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Diner,Bridal Shop,Sushi Restaurant,Ice Cream Shop,Mobile Phone Shop,Sandwich Place,Chinese Restaurant,Restaurant
3,Bayview Village,Chinese Restaurant,Café,Japanese Restaurant,Bank,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Dessert Shop
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Restaurant,Sandwich Place,Liquor Store,Butcher,Pub,Café,Italian Restaurant,Sushi Restaurant,Fast Food Restaurant


# IDENTIFYING POTENTIAL NEIGHBOURHOODS

Looking for neighbourhoods that do not feature coffee shops. Then filter by ones that do contain parks.

In [79]:
without_coffee = neighborhoods_venues_sorted[~neighborhoods_venues_sorted.apply(lambda r: r.str.contains('Coffee Shop', case=False).any(), axis=1)] 
with_parks = without_coffee[without_coffee.apply(lambda r: r.str.contains('Park', case=False).any(), axis=1)] 
with_parks

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Caledonia-Fairbanks,Park,Women's Store,Pool,Distribution Center,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant
19,Davisville North,Gym / Fitness Center,Hotel,Playground,Department Store,Sandwich Place,Breakfast Spot,Food & Drink Shop,Park,General Entertainment,Curling Ice
23,"Dorset Park, Wexford Heights, Scarborough Town...",Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Brewery,Light Rail Station,Gluten-free Restaurant,Gift Shop,Eastern European Restaurant,Drugstore
25,Downsview East,Park,Airport,Yoga Studio,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Donut Shop
27,Downsview West,Grocery Store,Bank,Hotel,Shopping Mall,Park,Eastern European Restaurant,Drugstore,Donut Shop,Doner Restaurant,Department Store
30,Enclave of M4L,Light Rail Station,Park,Spa,Skate Park,Auto Workshop,Burrito Place,Fast Food Restaurant,Farmers Market,Garden,Garden Center
35,Forest Hill North & West,Jewelry Store,Trail,Park,Sushi Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Yoga Studio
37,Glencairn,Park,Asian Restaurant,Italian Restaurant,Metro Station,Yoga Studio,Distribution Center,Dim Sum Restaurant,Diner,Discount Store,Dog Run
38,"Golden Mile, Clairlea, Oakridge",Bakery,Park,Intersection,Metro Station,Bus Line,Bus Station,Ice Cream Shop,Soccer Field,Convenience Store,Electronics Store
41,"High Park, The Junction South",Café,Mexican Restaurant,Bar,Thai Restaurant,Fast Food Restaurant,Speakeasy,Diner,Italian Restaurant,Bakery,Bookstore


In [81]:
merge_df = pd.merge(with_parks, df, how="left", on=["Neighborhood", "Neighborhood"])
merge_df

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,PostalCode,Borough,Latitude,Longitude
0,Caledonia-Fairbanks,Park,Women's Store,Pool,Distribution Center,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant,M6E,York,43.689026,-79.453512
1,Davisville North,Gym / Fitness Center,Hotel,Playground,Department Store,Sandwich Place,Breakfast Spot,Food & Drink Shop,Park,General Entertainment,Curling Ice,M4P,Central Toronto,43.712751,-79.390197
2,"Dorset Park, Wexford Heights, Scarborough Town...",Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Brewery,Light Rail Station,Gluten-free Restaurant,Gift Shop,Eastern European Restaurant,Drugstore,M1P,Scarborough,43.75741,-79.273304
3,Downsview East,Park,Airport,Yoga Studio,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Donut Shop,M3K,North York,43.737473,-79.464763
4,Downsview West,Grocery Store,Bank,Hotel,Shopping Mall,Park,Eastern European Restaurant,Drugstore,Donut Shop,Doner Restaurant,Department Store,M3L,North York,43.739015,-79.506944
5,Enclave of M4L,Light Rail Station,Park,Spa,Skate Park,Auto Workshop,Burrito Place,Fast Food Restaurant,Farmers Market,Garden,Garden Center,M7Y,East Toronto Business,43.662744,-79.321558
6,Forest Hill North & West,Jewelry Store,Trail,Park,Sushi Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Yoga Studio,M5P,Central Toronto,43.696948,-79.411307
7,Glencairn,Park,Asian Restaurant,Italian Restaurant,Metro Station,Yoga Studio,Distribution Center,Dim Sum Restaurant,Diner,Discount Store,Dog Run,M6B,North York,43.709577,-79.445073
8,"Golden Mile, Clairlea, Oakridge",Bakery,Park,Intersection,Metro Station,Bus Line,Bus Station,Ice Cream Shop,Soccer Field,Convenience Store,Electronics Store,M1L,Scarborough,43.711112,-79.284577
9,"High Park, The Junction South",Café,Mexican Restaurant,Bar,Thai Restaurant,Fast Food Restaurant,Speakeasy,Diner,Italian Restaurant,Bakery,Bookstore,M6P,West Toronto,43.661608,-79.464763


# POPULATION ANALYSIS

In [84]:
import pandas as pd
import json
neigh_geo = json.load(open("/Users/stanleyrichards/Documents/Coursera/TorontoGeoJson"))

map_toronto2  = folium.Map(location=[latitude, -longitude], zoom_start=11)

map_toronto2.choropleth(geo_data=neigh_geo,
    data = df_pop,
    columns=['Geographic code','Population, 2016'],
    key_on='feature.properties.CFSAUID',
    fill_color='YlOrRd',
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Population by FSA')  

map_toronto2

#Add this to the map with the markers

# OVERLAYING NEIGHBOURHOODS WITH POPULATION DENSITY MAP

In [85]:
#ADD LOCATION MARKERS: Replace this with just the ones that are sans coffee shops and with other good stuff.
for lat, lng, label in zip(merge_df['Latitude'], merge_df['Longitude'], merge_df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto2) 
    
map_toronto2