# The Battle of the Neighbourhoods

## Loading of the necesary libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

print('Libraries imported.')

Libraries imported.


## Getting the Population of Toronto by Postal Code

### Wikipedia provides a page with all the postal code prefixes for Toronto, noting the neighbourhoods to which they belong.

In [2]:
# Scrape postal code data from the Toronto postal code Wikipedia page.

postalcodedata = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

In [3]:
# Locate the proper table.

postalcodedata[0]

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [4]:
# Convert the postal code data into a dataframe.

postalcodedf = postalcodedata[0]
postalcodedf.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
# Postal codes that are not assigned will not be used.  Therefore, they will be dropped from the dataframe.

filteredpostalcodedata = postalcodedf[postalcodedf.Borough != "Not assigned"]
filteredpostalcodedata.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Stats Canada provides a web page where the population of each postal code prefix is given.

In [6]:
# Scrape the Stats Canada website for the population data

postalcodepop = pd.read_html("https://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Table.cfm?Lang=Eng&T=1201&SR=1&S=22&O=A&RPP=9999&PR=0")

In [7]:
# Find the correct table.

postalcodepop[0]

Unnamed: 0,Geographic name,"Population, 2016","Total private dwellings, 2016","Private dwellings occupied by usual residents, 2016"
0,,,,
1,CanadaFootnote 1,35151728.0,15412443.0,14072079.0
2,A0A,46587.0,26155.0,19426.0
3,A0B,19792.0,13658.0,8792.0
4,A0C,12587.0,8010.0,5606.0
5,A0E,22294.0,12293.0,9603.0
6,A0G,35266.0,21750.0,15200.0
7,A0H,17804.0,9928.0,7651.0
8,A0J,7880.0,4813.0,3426.0
9,A0K,26058.0,15159.0,11090.0


In [8]:
# Create a dataframe.

postalcodepopdf = postalcodepop[0]
postalcodepopdf.head()

Unnamed: 0,Geographic name,"Population, 2016","Total private dwellings, 2016","Private dwellings occupied by usual residents, 2016"
0,,,,
1,CanadaFootnote 1,35151728.0,15412443.0,14072079.0
2,A0A,46587.0,26155.0,19426.0
3,A0B,19792.0,13658.0,8792.0
4,A0C,12587.0,8010.0,5606.0


In [9]:
# Drop postal codes not associated to Toronto from the bottom of the dataframe.

popdf = postalcodepopdf[:-645]
popdf.head()

Unnamed: 0,Geographic name,"Population, 2016","Total private dwellings, 2016","Private dwellings occupied by usual residents, 2016"
0,,,,
1,CanadaFootnote 1,35151728.0,15412443.0,14072079.0
2,A0A,46587.0,26155.0,19426.0
3,A0B,19792.0,13658.0,8792.0
4,A0C,12587.0,8010.0,5606.0


In [10]:
# Drop the top non-Toronto postal codes.

pop = popdf.drop(popdf.index[0:896])
pop.head()

Unnamed: 0,Geographic name,"Population, 2016","Total private dwellings, 2016","Private dwellings occupied by usual residents, 2016"
896,M1B,66108.0,20957.0,20230.0
897,M1C,35626.0,11588.0,11274.0
898,M1E,46943.0,17637.0,17161.0
899,M1G,29690.0,10116.0,9767.0
900,M1H,24383.0,9274.0,8985.0


In [11]:
# Drop all unnecessary columns for the work to be done.

popfinal = pop.drop(['Total private dwellings, 2016', 'Private dwellings occupied by usual residents, 2016'], axis=1)
popfinal.head()

Unnamed: 0,Geographic name,"Population, 2016"
896,M1B,66108.0
897,M1C,35626.0
898,M1E,46943.0
899,M1G,29690.0
900,M1H,24383.0


In [12]:
# Rename the "Geographic name" to "Postal Code" to allow merging.

popfinal = popfinal.rename(columns={'Geographic name': 'Postal Code'})
popfinal.head()

Unnamed: 0,Postal Code,"Population, 2016"
896,M1B,66108.0
897,M1C,35626.0
898,M1E,46943.0
899,M1G,29690.0
900,M1H,24383.0


In [13]:
# Merge the two dataframes.

merged = pd.merge(filteredpostalcodedata, popfinal, on="Postal Code")
merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,"Population, 2016"
0,M3A,North York,Parkwoods,34615.0
1,M4A,North York,Victoria Village,14443.0
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",41078.0
3,M6A,North York,"Lawrence Manor, Lawrence Heights",21048.0
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",10.0


### Get the cooridnates for the Toronto postal codes.

In [14]:
# Read the CSV file.

data2 = pd.read_csv("http://cocl.us/Geospatial_data/Geospacial_coordinates.csv")

In [15]:
# Create a dataframe from the data.

coordinates = data2
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [16]:
# Merge the latitude and longitude values with the postal code and population data.

populationdata = pd.merge(merged, coordinates, on="Postal Code")
populationdata.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,"Population, 2016",Latitude,Longitude
0,M3A,North York,Parkwoods,34615.0,43.753259,-79.329656
1,M4A,North York,Victoria Village,14443.0,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",41078.0,43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",21048.0,43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",10.0,43.662301,-79.389494


## Extraction of Foursquare Data

### Get the Coordinates of Toronto, Ontario

In [17]:
# Use the geopy library to get the coordinates of Toronto, Ontario.

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


### Define Foursquare Credentials

In [18]:
CLIENT_ID = 'K0TFLQE1PUT0QSHGVODLY3LVVJPHQVHOHSTGJVWXWSO2X2QU' # your Foursquare ID
CLIENT_SECRET = '3NB0ZRIHEED0Y1JVFVFPV24O1QHOKT4Z25TIHBL5IBKRMBWZ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: K0TFLQE1PUT0QSHGVODLY3LVVJPHQVHOHSTGJVWXWSO2X2QU
CLIENT_SECRET:3NB0ZRIHEED0Y1JVFVFPV24O1QHOKT4Z25TIHBL5IBKRMBWZ


### Get Dental Clinic Data from Foursquare

In [19]:
# Set the desired parameters.

search_query = 'Dental'
radius = 100000
LIMIT = 10000
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=K0TFLQE1PUT0QSHGVODLY3LVVJPHQVHOHSTGJVWXWSO2X2QU&client_secret=3NB0ZRIHEED0Y1JVFVFPV24O1QHOKT4Z25TIHBL5IBKRMBWZ&ll=43.6534817,-79.3839347&v=20180605&query=Dental&radius=100000&limit=10000'

In [20]:
# Get the results in JSON form.

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5eee954ec546f3001b0664be'},
 'response': {'venues': [{'id': '4d10c2fa7177b1f7055c5322',
    'name': 'H&M Dental & Associates',
    'location': {'address': '810-123 Edward St',
     'crossStreet': 'Edward St & University',
     'lat': 43.6556282,
     'lng': -79.3867569,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.6556282,
       'lng': -79.3867569}],
     'distance': 329,
     'postalCode': 'M5G 1E2',
     'cc': 'CA',
     'city': 'Toronto',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': ['810-123 Edward St (Edward St & University)',
      'Toronto ON M5G 1E2',
      'Canada']},
    'categories': [{'id': '4bf58dd8d48988d178941735',
      'name': "Dentist's Office",
      'pluralName': "Dentist's Offices",
      'shortName': "Dentist's Office",
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/medical_dentist_',
       'suffix': '.png'},
      'primary': True}],
    'venuePage': {'id':

In [21]:
# Assign relevant part of JSON to venues.

venues = results['response']['venues']

# Tranform venues into a dataframe.

dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'id': '4bf58dd8d48988d178941735', 'name': 'D...",False,4d10c2fa7177b1f7055c5322,810-123 Edward St,CA,Toronto,Canada,Edward St & University,329,"[810-123 Edward St (Edward St & University), T...","[{'label': 'display', 'lat': 43.6556282, 'lng'...",43.655628,-79.386757,M5G 1E2,ON,H&M Dental & Associates,v-1592694170,96070716.0
1,"[{'id': '4bf58dd8d48988d178941735', 'name': 'D...",False,4e0cc008d4c07c82a2df6ae8,181 University Ave.,CA,Toronto,Canada,at Adelaide St. W,489,"[181 University Ave. (at Adelaide St. W), Toro...","[{'label': 'display', 'lat': 43.64921954053825...",43.64922,-79.385406,M5H 3M7,ON,Downtown Dental Associates,v-1592694170,
2,"[{'id': '4bf58dd8d48988d178941735', 'name': 'D...",False,4e95d177f5b90ceed0850c5f,100 King St Unit C13,CA,Toronto,Canada,Bay Street,475,"[100 King St Unit C13 (Bay Street), Toronto ON...","[{'label': 'display', 'lat': 43.64936018119344...",43.64936,-79.382382,M9N 1L3,ON,Altima Dental Centre at First Canadian Place,v-1592694170,
3,"[{'id': '4bf58dd8d48988d178941735', 'name': 'D...",False,4dfa8ac5d4c064db03ac7fbb,"333 Bay St., Unit C20",CA,Toronto,Canada,at Adelaide St.,485,"[333 Bay St., Unit C20 (at Adelaide St.), Toro...","[{'label': 'display', 'lat': 43.650467, 'lng':...",43.650467,-79.379576,M5H 2R2,ON,Toothworks Bay-Adelaide Dental,v-1592694170,70646013.0
4,"[{'id': '4bf58dd8d48988d178941735', 'name': 'D...",False,4c6a983d3bad2d7fd8c2b2ee,18 King St E,CA,Toronto,Canada,at Yonge Street,542,"[18 King St E (at Yonge Street), Toronto ON M5...","[{'label': 'display', 'lat': 43.650338030648, ...",43.650338,-79.378786,M5H 1L6,ON,Yonge Dental Centre,v-1592694170,


In [22]:
# Keep only the columns that include venue name, and anything that is associated with location.

filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# Extract the category of the venue.

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# Filter the category for each row.

dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# Clean the column names by keeping only last term.

dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,H&M Dental & Associates,Dentist's Office,810-123 Edward St,CA,Toronto,Canada,Edward St & University,329,"[810-123 Edward St (Edward St & University), T...","[{'label': 'display', 'lat': 43.6556282, 'lng'...",43.655628,-79.386757,M5G 1E2,ON,4d10c2fa7177b1f7055c5322
1,Downtown Dental Associates,Dentist's Office,181 University Ave.,CA,Toronto,Canada,at Adelaide St. W,489,"[181 University Ave. (at Adelaide St. W), Toro...","[{'label': 'display', 'lat': 43.64921954053825...",43.64922,-79.385406,M5H 3M7,ON,4e0cc008d4c07c82a2df6ae8
2,Altima Dental Centre at First Canadian Place,Dentist's Office,100 King St Unit C13,CA,Toronto,Canada,Bay Street,475,"[100 King St Unit C13 (Bay Street), Toronto ON...","[{'label': 'display', 'lat': 43.64936018119344...",43.64936,-79.382382,M9N 1L3,ON,4e95d177f5b90ceed0850c5f
3,Toothworks Bay-Adelaide Dental,Dentist's Office,"333 Bay St., Unit C20",CA,Toronto,Canada,at Adelaide St.,485,"[333 Bay St., Unit C20 (at Adelaide St.), Toro...","[{'label': 'display', 'lat': 43.650467, 'lng':...",43.650467,-79.379576,M5H 2R2,ON,4dfa8ac5d4c064db03ac7fbb
4,Yonge Dental Centre,Dentist's Office,18 King St E,CA,Toronto,Canada,at Yonge Street,542,"[18 King St E (at Yonge Street), Toronto ON M5...","[{'label': 'display', 'lat': 43.650338030648, ...",43.650338,-79.378786,M5H 1L6,ON,4c6a983d3bad2d7fd8c2b2ee
5,Bond Street Dental Implants Toronto,Dentist's Office,113 Bond St,CA,Toronto,Canada,,587,"[113 Bond St, Toronto ON M5B 1Y2, Canada]","[{'label': 'display', 'lat': 43.657138, 'lng':...",43.657138,-79.378672,M5B 1Y2,ON,57523e81498ea717003c7656
6,The Plaza Dental Centre,Dentist's Office,"40 King Street West, Suite 140",CA,Toronto,Canada,,591,"[40 King Street West, Suite 140, Toronto ON M5...","[{'label': 'display', 'lat': 43.65018302942632...",43.650183,-79.37817,M5H 3Y2,ON,4bbb85632d9ea5933c819fce
7,Dawson Dental Centre,Dentist's Office,1 Queen St E #2101,CA,Toronto,Canada,,627,"[1 Queen St E #2101, Toronto ON M5C 2W5, Canada]","[{'label': 'display', 'lat': 43.65351080520015...",43.653511,-79.376146,M5C 2W5,ON,4bfd29a2f7c82d7fa1ba8d04
8,Altima Scotia Plaza Dental Centre,Dentist's Office,104 Yonge St Unit 214,CA,Toronto,Canada,,581,"[104 Yonge St Unit 214, Toronto ON M5C 2Y6, Ca...","[{'label': 'display', 'lat': 43.65006929807037...",43.650069,-79.378473,M5C 2Y6,ON,4c1fd0d9b4e62d7ff536e093
9,City Dental On Yonge,,415 Yonge St,CA,Toronto,Canada,,713,"[415 Yonge St, Toronto ON M5B 2E7, Canada]","[{'label': 'display', 'lat': 43.65973419, 'lng...",43.659734,-79.381993,M5B 2E7,ON,5bead858ccad6b0039fb5d2e


### Calculate the best location for the dental clinic.

In [23]:
# List the dataframe columns.

cols = dataframe_filtered.columns.tolist()
cols

['name',
 'categories',
 'address',
 'cc',
 'city',
 'country',
 'crossStreet',
 'distance',
 'formattedAddress',
 'labeledLatLngs',
 'lat',
 'lng',
 'postalCode',
 'state',
 'id']

In [24]:
# Isolate the longitude and latitude columns.

cols = cols[-3:] + cols[:-3]
cols

['postalCode',
 'state',
 'id',
 'name',
 'categories',
 'address',
 'cc',
 'city',
 'country',
 'crossStreet',
 'distance',
 'formattedAddress',
 'labeledLatLngs',
 'lat',
 'lng']

In [25]:
# Create a dataframe from the data.

dataframe_filtered = dataframe_filtered[cols]
dataframe_filtered.head()

Unnamed: 0,postalCode,state,id,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng
0,M5G 1E2,ON,4d10c2fa7177b1f7055c5322,H&M Dental & Associates,Dentist's Office,810-123 Edward St,CA,Toronto,Canada,Edward St & University,329,"[810-123 Edward St (Edward St & University), T...","[{'label': 'display', 'lat': 43.6556282, 'lng'...",43.655628,-79.386757
1,M5H 3M7,ON,4e0cc008d4c07c82a2df6ae8,Downtown Dental Associates,Dentist's Office,181 University Ave.,CA,Toronto,Canada,at Adelaide St. W,489,"[181 University Ave. (at Adelaide St. W), Toro...","[{'label': 'display', 'lat': 43.64921954053825...",43.64922,-79.385406
2,M9N 1L3,ON,4e95d177f5b90ceed0850c5f,Altima Dental Centre at First Canadian Place,Dentist's Office,100 King St Unit C13,CA,Toronto,Canada,Bay Street,475,"[100 King St Unit C13 (Bay Street), Toronto ON...","[{'label': 'display', 'lat': 43.64936018119344...",43.64936,-79.382382
3,M5H 2R2,ON,4dfa8ac5d4c064db03ac7fbb,Toothworks Bay-Adelaide Dental,Dentist's Office,"333 Bay St., Unit C20",CA,Toronto,Canada,at Adelaide St.,485,"[333 Bay St., Unit C20 (at Adelaide St.), Toro...","[{'label': 'display', 'lat': 43.650467, 'lng':...",43.650467,-79.379576
4,M5H 1L6,ON,4c6a983d3bad2d7fd8c2b2ee,Yonge Dental Centre,Dentist's Office,18 King St E,CA,Toronto,Canada,at Yonge Street,542,"[18 King St E (at Yonge Street), Toronto ON M5...","[{'label': 'display', 'lat': 43.650338030648, ...",43.650338,-79.378786


In [26]:
# Transform the data to an array for calculations.

clinic_array = dataframe_filtered.values
clinic_array

array([['M5G 1E2', 'ON', '4d10c2fa7177b1f7055c5322',
        'H&M Dental & Associates', "Dentist's Office",
        '810-123 Edward St', 'CA', 'Toronto', 'Canada',
        'Edward St & University', 329,
        list(['810-123 Edward St (Edward St & University)', 'Toronto ON M5G 1E2', 'Canada']),
        list([{'label': 'display', 'lat': 43.6556282, 'lng': -79.3867569}]),
        43.6556282, -79.3867569],
       ['M5H 3M7', 'ON', '4e0cc008d4c07c82a2df6ae8',
        'Downtown Dental Associates', "Dentist's Office",
        '181 University Ave.', 'CA', 'Toronto', 'Canada',
        'at Adelaide St. W', 489,
        list(['181 University Ave. (at Adelaide St. W)', 'Toronto ON M5H 3M7', 'Canada']),
        list([{'label': 'display', 'lat': 43.649219540538255, 'lng': -79.38540597046132}]),
        43.649219540538255, -79.38540597046132],
       ['M9N 1L3', 'ON', '4e95d177f5b90ceed0850c5f',
        'Altima Dental Centre at First Canadian Place',
        "Dentist's Office", '100 King St Unit C1

In [27]:
# Calculate the location of the clinic independent of population.

clinic_centroid = np.mean(clinic_array[:,-2:], axis=0)
clinic_centroid

array([43.655707189201074, -79.38503867442928], dtype=object)

In [28]:
# Convert the poulationdata dataframe to an array in order to calculate the centroid.

pop_array = populationdata.values
pop_array

array([['M3A', 'North York', 'Parkwoods', 34615.0, 43.7532586,
        -79.3296565],
       ['M4A', 'North York', 'Victoria Village', 14443.0,
        43.725882299999995, -79.31557159999998],
       ['M5A', 'Downtown Toronto', 'Regent Park, Harbourfront', 41078.0,
        43.6542599, -79.3606359],
       ['M6A', 'North York', 'Lawrence Manor, Lawrence Heights', 21048.0,
        43.718517999999996, -79.46476329999999],
       ['M7A', 'Downtown Toronto',
        "Queen's Park, Ontario Provincial Government", 10.0, 43.6623015,
        -79.3894938],
       ['M9A', 'Etobicoke', 'Islington Avenue, Humber Valley Village',
        35594.0, 43.6678556, -79.53224240000002],
       ['M1B', 'Scarborough', 'Malvern, Rouge', 66108.0,
        43.806686299999996, -79.19435340000001],
       ['M3B', 'North York', 'Don Mills', 13324.0, 43.745905799999996,
        -79.352188],
       ['M4B', 'East York', 'Parkview Hill, Woodbine Gardens', 18628.0,
        43.7063972, -79.309937],
       ['M5B', 'Downtown

In [29]:
# Calculate the location of the clinic using a weighted centroid.

pop_centroid = np.mean(pop_array[:,-3:], axis=0)
pop_centroid

array([26785.676470588234, 43.70527089215686, -79.39500912647058],
      dtype=object)

In [30]:
# Find the final location for the dental clinic by finding the mean location.

final_loc_lat = (43.655707189201074 + 43.70527089215686) / 2
final_loc_lon = (-79.38503867442928 - 79.39500912647058) / 2

In [31]:
print ("Therefore, the best location for the clinic is latitude ", final_loc_lat, "longitude", final_loc_lon,".")

Therefore the best location for the clinic is latitude  43.680489040678964 longitude -79.39002390044993 .
