# IBM Data Science Capstone Project Notebook

by Shan Ali

### Overview

1. Generate Neighborhood Dataset
2. Generate General Functions
3. Generate Venue Dataset
4. Explore Data: Cluster neighborhoods and generate example profiles
5. Collect Input Data: Collect previous home and work addresses and future work address
6. Generate Input Dataset: Collect venue information about previous address
7. Generate Comparison Dataset: Temp clone neighborhood dataset and add distance to future work address
8. Construct Recommender System: Run content-based recommender to rank potential neighborhoods
9. Generate Map: of Toronto with neighborhood & recommendation flags
10. Visualize Final Results

### Part 1: Generate Neighbor Dataset

In [465]:
# import relavent libraries
import numpy as np
import pandas as pd
from urllib.request import urlopen

#!conda install -c conda-forge bs4 --yes # only run first time
from bs4 import BeautifulSoup
import re

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

In [466]:
# Pull html from url
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html = urlopen(url)

# Create BeautifulSoup object
soup = BeautifulSoup(html, 'html.parser')
type(soup)

bs4.BeautifulSoup

In [467]:
# Pull rows from object
rows = soup.find_all('tr')

# Clean rows
list_rows = []
for row in rows:
    cells = row.find_all('td')
    str_cells = str(cells)
    clean = re.compile('<.*?>')
    clean2 = (re.sub(clean, '',str_cells))
    list_rows.append(clean2)
    
# Transform into df
df = pd.DataFrame(list_rows)
df = df[0].str.split(',', expand=True)
for i in range(3):
    df[i] = df[i].str.strip('[')
    df[i] = df[i].str.strip(']')
    df[i] = df[i].str.strip('\n')
    
# add columns
col = pd.DataFrame(['Postal Code','Borough','Neighborhood']).transpose()
df = df.rename(columns=col.iloc[0])

# drop extra rows/columns
df = df.iloc[1:,:3]

# drop 'not assigned' rows
df = df[df.Borough != ' Not assigned']

# split neighborhood with ','
df['Neighborhood'] = df['Neighborhood'].str.replace(' /',',')

# reset index
df.reset_index(drop=True,inplace=True)

In [468]:
# split neighborhoods in row
df1 = df
df1 = df1['Neighborhood'].str.split(',', expand=True)
df2 = df.merge(df1, on = df.index)
df2.drop(columns = ['key_0','Neighborhood'], inplace=True)

# split neighborhoods into new rows
df_new = pd.DataFrame([])

# split hoods
for row in df2.index:
    for col in df2.columns[2:]:
        if df2.iloc[row,col+2] != None:
            df_temp = df2.iloc[row,:]
            df_temp[0] = df_temp[col]
            df_new = df_new.append(df_temp, ignore_index=True)

# clean rows
df_new['P'] = df_new['Postal Code']
df_new['B'] = df_new['Borough']
df_new['N'] = df_new[0]
df_new = df_new.iloc[:-3,-3:]
df_new = df_new.rename(columns={'P':'Postal Code','B':'Borough','N':'Neighborhood'})

In [470]:
# get lat & long
longitude = pd.DataFrame([])
latitude = pd.DataFrame([])

for hood in df_new['Neighborhood']:
    address = '{}, ON'.format(hood)

    geolocator = Nominatim(user_agent="toronto_explorer")
    location = geolocator.geocode(address)
    if location != None:
        long = location.longitude
        lat = location.latitude
    else:
        long = np.nan
        lat = np.nan
    longitude = longitude.append(pd.DataFrame([long]))
    latitude = latitude.append(pd.DataFrame([lat])) 

In [471]:
# add long & lat
longitude.reset_index(drop=True, inplace=True)
latitude.reset_index(drop=True, inplace=True)

ll_df = df_new
ll_df['Longitude'] = longitude
ll_df['Latitude'] = latitude

# remove rows w/ nan
ll_df.dropna(inplace=True)
ll_df.reset_index(drop=True, inplace=True)

In [472]:
# clean of faulty long & lat coordinates
tor_df = pd.DataFrame([])

for row in ll_df.index:
    if ll_df.loc[row,'Latitude'] >= 43:
        if ll_df.loc[row,'Latitude'] <= 45:
            if ll_df.loc[row,'Longitude'] >= -80:
                if ll_df.loc[row,'Longitude'] <= -78:
                    tor_df = tor_df.append(ll_df.loc[row])

# Final data clean
tor_df.drop(columns = ['Postal Code'], inplace=True)
tor_df.reset_index(drop=True, inplace=True)
fin_col = ['Borough', 'Neighborhood', 'Latitude', 'Longitude']
tor_df = tor_df[fin_col]
print(tor_df.shape)
tor_df.head()

(165, 4)


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,North York,Parkwoods,43.7588,-79.320197
1,North York,Victoria Village,43.732658,-79.311189
2,Downtown Toronto,Regent Park,43.660706,-79.360457
3,Downtown Toronto,Harbourfront,43.64008,-79.38015
4,North York,Lawrence Manor,43.722079,-79.437507


### Part 2: Generate Functions

In [473]:
# import libraries
import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [474]:
# initialize for Foursqyare API
CLIENT_ID = '2LGT5DBF3CF1KCWNVP3GLC0M5MSCP330DZLZK224AJSFERY5' # your Foursquare ID
CLIENT_SECRET = '0VNMBF0U12VI1HAZUB1JG4BFXQWY2D2YU1VVBZQOWXA0I2LF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
radius = 500

In [480]:
# function to pull nearby data from Foursquare
def getNearbyVenues(names, latitudes, longitudes, radius=500, is_input=False):
    
    venues_list=[]
    if is_input == False:
        for name, lat, lng in zip(names, latitudes, longitudes):
            
            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
                CLIENT_ID, 
                CLIENT_SECRET, 
                VERSION, 
                lat, 
                lng, 
                radius, 
                LIMIT)
            
            # make the GET request
            results = requests.get(url).json()["response"]['groups'][0]['items']

            # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])
    else:
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            latitudes, 
            longitudes, 
            radius, 
            LIMIT)

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']

        # return only relevant information for each nearby venue
        venues_list.append([(
            names, 
            latitudes, 
            longitudes,
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [481]:
# function to one-hot a venue list
def getOneHot(nearby_venues):
    
    # one hot the list
    temp_onehot = pd.get_dummies(nearby_venues[['Venue Category']], prefix="", prefix_sep="")
    if 'Neighborhood' in temp_onehot.columns:
        temp_onehot.drop(columns='Neighborhood',inplace=True)
    
    # save to move neighborhood column to the first column
    fixed_columns = ['Neighborhood'] + list(temp_onehot.columns[:])

    # add neighborhood column back to dataframe
    temp_onehot['Neighborhood'] = nearby_venues['Neighborhood'] 

    # move neighborhood column to the first column
    temp_onehot = temp_onehot[fixed_columns]

    # get % for comparisons
    temp_grouped = temp_onehot.groupby('Neighborhood').mean().reset_index()

    return(temp_grouped)

In [482]:
# function to return most common venuew
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [483]:
# function to sort neighborhood venues
def venuesSort(grouped_venues, num_top_venues):

    indicators = ['st', 'nd', 'rd']

    # create columns according to number of top venues
    columns = ['Neighborhood']
    for ind in np.arange(num_top_venues):
        try:
            columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
        except:
            columns.append('{}th Most Common Venue'.format(ind+1))

    # create a new dataframe
    neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
    neighborhoods_venues_sorted['Neighborhood'] = grouped_venues['Neighborhood']

    for ind in np.arange(grouped_venues.shape[0]):
        neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped_venues.iloc[ind, :], num_top_venues)

    return(neighborhoods_venues_sorted)

### Part 3: Generate Venue Dataset

In [None]:
# run to pull all Toronto Venues
toronto_venues = getNearbyVenues(names = tor_df['Neighborhood'],
                          latitudes = tor_df['Latitude'],
                          longitudes = tor_df['Longitude']
                         )

# run to one hot and normalize all venues
toronto_grouped = getOneHot(toronto_venues)

# run to sort and arrange top venues per hood
num_top_venues = 10
toronto_sorted = venuesSort(toronto_grouped, num_top_venues)


In [494]:
toronto_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Distance,Dissimilarity
0,1,Agincourt,Chinese Restaurant,Asian Restaurant,Korean Restaurant,Hong Kong Restaurant,Food Court,Vietnamese Restaurant,Cantonese Restaurant,Coffee Shop,Shopping Mall,Train Station,0.074065,1.120869
1,5,Agincourt North,Chinese Restaurant,Bank,Beer Store,Restaurant,Movie Theater,Clothing Store,Frozen Yogurt Shop,Fried Chicken Joint,Bakery,Coffee Shop,0.052246,0.940836
2,1,Alderwood,Pizza Place,Pharmacy,Gym,Dance Studio,Pub,Sandwich Place,Skating Rink,Coffee Shop,Event Space,Donut Shop,0.088576,1.231913
3,2,Bathurst Manor,Playground,Convenience Store,Baseball Field,Park,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,0.129901,1.594968
4,1,Bathurst Quay,Coffee Shop,Café,Park,Bank,Diner,Sushi Restaurant,Rental Car Location,Caribbean Restaurant,Garden,Ramen Restaurant,0.149508,1.137907


### Part 4: Explore Dataset

In [487]:
# import k-means
from sklearn.cluster import KMeans

In [491]:
# conduct k means in 6 clusters
kclusters = 6

temp_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(temp_clustering)

# check cluster labels generated for each row in the dataframe
klabels = kmeans.labels_

In [None]:
# update dfs
toronto_sorted.insert(0, 'Cluster Labels', klabels)
temp_merged = tor_df.join(toronto_sorted.set_index('Neighborhood'), on='Neighborhood')

In [493]:
temp_merged.groupby('Cluster Labels').count()

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Distance,Dissimilarity
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0.0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
1.0,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46
2.0,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52
3.0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
4.0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
5.0,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51


### Part 5: Collect Input

In [495]:
# input user data here -> may make prompted eventually
previous_address = '3393 Thornberry Trail, Highland Village, TX'
previous_work_address = '1144 N Stemmons Fwy, Lewisville, TX'
future_work_address = '770 Don Mills Rd, North York, ON'

### Part 6: Generate Input Dataset

In [496]:
# generate inital dataframe with Neighborhood, Lat, and Long
input_df = pd.DataFrame({"Address": [previous_address, previous_work_address, future_work_address]})
longitude = pd.DataFrame([])
latitude = pd.DataFrame([]) 
neighborhood = pd.DataFrame([]) 

# get long & lat
for address in input_df['Address']:
    geolocator = Nominatim(user_agent="world_explorer")
    location = geolocator.geocode(address)
    if location != None:
        long = location.longitude
        lat = location.latitude
    else:
        long = np.nan
        lat = np.nan
    longitude = longitude.append(pd.DataFrame([long]))
    latitude = latitude.append(pd.DataFrame([lat])) 
    hood_temp = address.split(',')
    neighborhood = neighborhood.append(pd.DataFrame([hood_temp[1]]))
    
# prep for merging
neighborhood.reset_index(drop=True, inplace=True)
longitude.reset_index(drop=True, inplace=True)
latitude.reset_index(drop=True, inplace=True)

# merge
input_df['Neighborhood'] = neighborhood
input_df['Longitude'] = longitude
input_df['Latitude'] = latitude

input_df.head()

Unnamed: 0,Address,Neighborhood,Longitude,Latitude
0,"3393 Thornberry Trail, Highland Village, TX",Highland Village,-97.080125,33.100505
1,"1144 N Stemmons Fwy, Lewisville, TX",Lewisville,-97.010913,33.057468
2,"770 Don Mills Rd, North York, ON",North York,-79.340243,43.714838


In [497]:
# generate input venue data
name = input_df.values[0,1]
latitude = input_df.values[0,3]
longitude = input_df.values[0,2]
radius = 5000 # address is too suburb for 500 m radius + need more venues than num_top_venues
                                                
input_venues = getNearbyVenues(name, latitude, longitude, radius, is_input=True)

# run to one hot and normalize all venues
input_grouped = getOneHot(input_venues)

# run to sort and arrange top venues per hood
num_top_venues = 10
input_sorted = venuesSort(input_grouped, num_top_venues)

In [498]:
input_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Highland Village,Fast Food Restaurant,Pizza Place,Convenience Store,Restaurant,Grocery Store,Sandwich Place,Pharmacy,Coffee Shop,Burger Joint,Asian Restaurant


### Part 7: Generate Comparison Datasets

In [499]:
# import libaries
import math as m

In [500]:
# calculate distance from work
def workDis(work_address, neighborhoods,is_input=False):
    temp_dis = pd.DataFrame([])
    if is_input == False:
        work_index = 2
        long_index = 3
        lat_index = 2
    else:
        work_index = 1
        long_index = 2
        lat_index = 3
    
    for i in range(neighborhoods.shape[0]):
        dx = work_address.values[work_index,2] - neighborhoods.values[i, long_index]
        dy = work_address.values[work_index,3] - neighborhoods.values[i, lat_index]
        dis = m.sqrt(dx**2 + dy**2)
        temp_dis = temp_dis.append(pd.DataFrame([dis]))
    
    temp_dis.reset_index(drop=True, inplace=True)
    return(temp_dis)

In [501]:
# get toronto distances
distance = workDis(input_df,tor_df)
toronto_grouped['Distance'] = distance/distance.values.max()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Service,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Distance,Dissimilarity
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.074065,1.120869
1,Agincourt North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.037037,0.0,0.0,0.0,0.037037,0.0,0.0,0.052246,0.940836
2,Alderwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.088576,1.231913
3,Bathurst Manor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.129901,1.594968
4,Bathurst Quay,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.149508,1.137907


In [502]:
# get work input distances
input_distance = workDis(input_df,input_df,is_input=True)
input_grouped['Distance'] = input_distance/distance.values.max()
input_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bank,Bar,Big Box Store,Bookstore,Breakfast Spot,...,Steakhouse,Supplement Shop,Taco Place,Tex-Mex Restaurant,Toy / Game Store,Video Game Store,Wine Bar,Wine Shop,Wings Joint,Distance
0,Highland Village,0.02,0.03,0.01,0.01,0.01,0.02,0.02,0.01,0.02,...,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.124934


### Part 8: Generate Recommender

In [503]:
# initialize dataframes
input_temp = input_grouped.drop('Neighborhood', 1)
toronto_temp = toronto_grouped.drop('Neighborhood', 1)
col_temp = toronto_temp.columns.isin(input_temp.columns).tolist()
toronto_temp = toronto_temp[toronto_temp.columns[col_temp]]
col_temp = input_temp.columns.isin(toronto_temp.columns).tolist()
input_temp = input_temp[input_temp.columns[col_temp]]

In [504]:
# initialize recomender
error_list = pd.DataFrame([])

# loop through neighborhoods and caluculate distance
for i in range(toronto_temp.shape[0]):
    error = np.absolute(toronto_temp.iloc[i] - input_temp).transpose().sum()
    error_list = error_list.append(pd.DataFrame(error))

In [505]:
toronto_sorted['Dissimilarity'] = error_list.reset_index(drop=True)
toronto_ranked = toronto_sorted.sort_values('Dissimilarity',ascending=True)

In [506]:
# initiate df
temp_ranked = toronto_ranked.loc[:,['Neighborhood','Dissimilarity']].reset_index(drop=True)
temp_ranked = temp_ranked.iloc[0:5]
temp_list = temp_ranked['Neighborhood'].tolist()

lng = pd.DataFrame([])
lat = pd.DataFrame([])
bor = pd.DataFrame([])

# pull lng, lat, & bor
for hood in temp_list:
    lng = lng.append(tor_df['Longitude'].loc[tor_df['Neighborhood']==hood].tolist())
    lat = lat.append(tor_df['Latitude'].loc[tor_df['Neighborhood']==hood].tolist())
    bor = bor.append(tor_df['Borough'].loc[tor_df['Neighborhood']==hood].tolist())
    
# add to temp df
temp_ranked['Borough'] = bor.reset_index(drop=True)
temp_ranked['Longitude'] = lng.reset_index(drop=True)
temp_ranked['Latitude'] = lat.reset_index(drop=True)

ranked_df = temp_ranked
ranked_df.head()

Unnamed: 0,Neighborhood,Dissimilarity,Borough,Longitude,Latitude
0,Yorkville,0.744075,Central Toronto,-79.390168,43.671386
1,Harbord,0.751415,Downtown Toronto,-79.414391,43.661512
2,Church and Wellesley,0.774365,Downtown Toronto,-79.383801,43.665524
3,Parkdale,0.81841,West Toronto,-79.436897,43.640495
4,Golden Mile,0.820268,Scarborough,-79.287622,43.727841


### Part 9: Generate Map

In [507]:
# import libraries
import folium

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [508]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
longitude = location.longitude
latitude = location.latitude

In [509]:
# create map
ranked_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to the map
markers_colors = []
for lat, lon, hood, bur in zip(tor_df['Latitude'], tor_df['Longitude'], tor_df['Neighborhood'], tor_df['Borough']):
    label = folium.Popup(str(hood) + ", " + str(bur), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='black',
        fill_opacity=0.7).add_to(ranked_map)

In [510]:
# add top 5 ranked
for lat, lon, hood, bor in zip(ranked_df['Latitude'], ranked_df['Longitude'], ranked_df['Neighborhood'], ranked_df['Borough']):
    label = folium.Popup(str(hood) + ", " + str(bor), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7).add_to(ranked_map)

In [511]:
# initialize work location and radius marker
hood = input_df.loc[2,'Neighborhood']
bor = tor_df['Borough'].loc[tor_df['Neighborhood']==hood].tolist()
lon = input_df.loc[2,'Longitude']
lat = input_df.loc[2,'Latitude']
rad = input_grouped['Distance'].tolist()

In [512]:
label = folium.Popup('Future Work:' + str(hood) + ", " + str(bor), parse_html=True)
folium.CircleMarker(
    [lat, lon],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='blue',
    fill_opacity=0.7).add_to(ranked_map)

<folium.features.CircleMarker at 0x7fc4ac59a208>

In [513]:
#initialize work radius
folium.CircleMarker(
    [lat, lon],
    radius=120, # dis to pixels, hardcoded
    color='green',
    fill=False).add_to(ranked_map)

<folium.features.CircleMarker at 0x7fc4ac601be0>

### Part 10: Visualize Results

In [514]:
ranked_map

#### Recommendations: Top 5 neighborhoo

In [515]:
ranked_df.head()

Unnamed: 0,Neighborhood,Dissimilarity,Borough,Longitude,Latitude
0,Yorkville,0.744075,Central Toronto,-79.390168,43.671386
1,Harbord,0.751415,Downtown Toronto,-79.414391,43.661512
2,Church and Wellesley,0.774365,Downtown Toronto,-79.383801,43.665524
3,Parkdale,0.81841,West Toronto,-79.436897,43.640495
4,Golden Mile,0.820268,Scarborough,-79.287622,43.727841
