# Toroto Neighborhood Analysis

### 1. Get Data from Wikipedia

First, extract all the neighborhood in Toronto with valid values in both Borough and Neighborhood

In [2]:
import requests
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
page

<Response [200]>

Then parse the page using BeautifulSoup library

In [3]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(page.content, 'html.parser')

tbody = soup.find('tbody')  # retrieve <tbody> tag of the html content
tr_elements = tbody.find_all('tr')  # retrieve all <tr> tags inside of <tbody>

neighborhoods_data = {}
for i in range(1, len(tr_elements)):  # the first elements are colunm names and it's invalid
    td_elements = tr_elements[i].find_all('td')  # find all <td> elements within <tr> tag
    if td_elements[1].get_text() != 'Not assigned': # pass if borough is 'Not assigned'
        postcode = td_elements[0].get_text()
        borough = td_elements[1].get_text()
        
        neighborhood = ''
        if td_elements[2].get_text().strip() == 'Not assigned':
            neighborhood += td_elements[1].get_text().strip() # strip the \n at the end of line
        else:
            neighborhood += td_elements[2].get_text().strip()
        
        if postcode in neighborhoods_data.keys():
            old_tuple = neighborhoods_data[postcode]
            new_neighborhood = old_tuple[2]
            new_neighborhood += ', '
            new_neighborhood += neighborhood
            neighborhoods_data[postcode] = (postcode, borough, new_neighborhood)
        else:
            new_tuple = (postcode, borough, neighborhood)
            neighborhoods_data[postcode] = new_tuple

Represent the data as dataframe in panda

In [4]:
import pandas as pd
labels = ['Postcode', 'Borough', 'Neighborhood']
df = pd.DataFrame.from_records(list(neighborhoods_data.values()), columns=labels)
df.shape

(103, 3)

### 2. Get Geodata for neighborhoods

Here, to get the longitude and latitude, I use the csv file for simplicity.

In [18]:
import csv

location_mapping = {}
with open('Geospatial_Coordinates.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count != 0:
            location_mapping[row[0]] = (row[0], row[1], row[2])
        line_count += 1
            
location_mapping            
            
geo_neighborhoods_data = {}
for postcode in neighborhoods_data.keys():
    n_data = neighborhoods_data[postcode]
    geo_loc = location_mapping[postcode]
    new_tuple = n_data + (geo_loc[1], geo_loc[2])
    geo_neighborhoods_data[postcode] = new_tuple
   
labels_geo = ['Postcode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude']
df_geo = pd.DataFrame.from_records(list(geo_neighborhoods_data.values()), columns=labels_geo)
df_geo.shape

(103, 5)

### 3. Get Geodata for neighborhoods

In [20]:
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


#### Create a map of Toronto with neighborhoods superimposed on top.

In [21]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="tn_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [23]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for postcode in geo_neighborhoods_data.keys():
    borough = geo_neighborhoods_data[postcode][1]
    neighborhood = geo_neighborhoods_data[postcode][2]
    lat = float(geo_neighborhoods_data[postcode][3])
    lng = float(geo_neighborhoods_data[postcode][4])
    label = f'{neighborhood}, {borough}'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

The following exploration is only exploring one neighborhood in Toronto. Other neighborhoods are similar to New York neighborhood so I don't include all of them here.

In [26]:
neighborhood = geo_neighborhoods_data['M5B']
neighborhood

('M5B',
 'Downtown Toronto',
 'Ryerson, Garden District',
 '43.6571618',
 '-79.3789371')

In [27]:
n_lat = float(downtown_toronto[3]) # downtown toronto latitude value
n_lng = float(downtown_toronto[4]) # downtown toronto longitude value
n_name = neighborhood[1] # neighborhood name

print(f'Latitude and longitude values of {n_name} are {n_lat}, {n_lng}.')

Latitude and longitude values of Downtown Toronto are 43.6571618, -79.3789371.


#### Now, let's get the top 100 venues that are in Downtown Toronto within a radius of 500 meters.

In [28]:
radius = 500 # define radius
LIMIT = 30
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    n_lat, 
    n_lng, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=KBE3EET1WSAPAJAITM0SYRFNW4VINGTV0S54V1TPJOHWFUML&client_secret=KMFWTCP1BRU4YZQIV0NSJSZIUCZC4G14KZ3CMLIDEIQKRKBL&v=20180605&ll=43.6571618,-79.3789371&radius=500&limit=30'

In [29]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cab0b05f594df2406677700'},
 'response': {'headerLocation': 'Downtown Toronto',
  'headerFullLocation': 'Downtown Toronto, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 127,
  'suggestedBounds': {'ne': {'lat': 43.6616618045, 'lng': -79.37272880013879},
   'sw': {'lat': 43.652661795499995, 'lng': -79.38514539986122}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '57eda381498ebe0e6ef40972',
       'name': 'UNIQLO ユニクロ',
       'location': {'address': '220 Yonge St',
        'crossStreet': 'at Dundas St W',
        'lat': 43.65591027779457,
        'lng': -79.38064099181345,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65591027779457,
          'lng': -79.38064099181345}],
        'di

In [30]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a pandas dataframe.

In [31]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,UNIQLO ユニクロ,Clothing Store,43.65591,-79.380641
1,Burrito Boyz,Burrito Place,43.656265,-79.378343
2,Blaze Pizza,Pizza Place,43.656518,-79.380015
3,Silver Snail Comics,Comic Shop,43.657031,-79.381403
4,Yonge-Dundas Square,Plaza,43.656054,-79.380495


And how many venues were returned by Foursquare?

In [32]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

30 venues were returned by Foursquare.
