In [96]:
from bs4 import BeautifulSoup
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library


* Define a variable for URL 
* Print URL value and make sure URL is valid

In [97]:
# Define a variable for URL 
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
results = requests.get(url)
print(results)

<Response [200]>


 * Get the HTML page contents
 * Convert the page to BeautifulSoup for parsing

In [98]:
 #get the HTML page contents
ca_postal_code_page = results.text

# Covert the page to SOUP
html_soup_page = BeautifulSoup(ca_postal_code_page, 'lxml')


* Get the HTML Postal Codes table data
* Assign table rows to a separate variable

In [99]:
# Get the HTML Postal Codes table data
table_detail = html_soup_page.find('table', class_='wikitable sortable')
table_rows = table_detail.find_all('tr')

* Loop through each row and skip that row where Borough is Not assigned 
* Also update "Not assigned" Neighbourhood value to Borough

In [100]:
a='Not assigned'
res = []
tab_row=[]
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    
    if row:
       if row[1] != a:
          if row[2] == a:
              row[2] = row[1] 
          res.append(row)
      
df = pd.DataFrame(res, columns=['PostalCode','Borough','Neighbourhood'])
df.shape


(211, 3)

* Combine the rows by Postal Code and display all Neighbourhoods under that postal code in one row

In [102]:
# Combine rows by postal code so we combine 
page_header = html_soup_page.title.text

df_combine = df.groupby(['PostalCode','Borough'])['Neighbourhood'].agg(lambda col: ', '.join(col.astype('str'))).reset_index()


* Get Coordinates data from CSV file and join the two dataframes on Postal Code
* Note: I was not able to get the geographical coordinates of the neighborhoods using the Geocoder package therefore I had to use CSV file.

In [103]:
file_url = 'http://cocl.us/Geospatial_data'
df_cords = pd.read_csv(file_url, skiprows=[0], header = None)

df_cords.columns=['Code','Latitude','Longitude']

pd.merge(df_combine, df_cords, left_on="PostalCode", right_on="Code").drop('Code', axis=1)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


* Get teh coordinates of Toronto city

In [104]:
neighborhoods=pd.merge(df_combine, df_cords, left_on="PostalCode", right_on="Code").drop('Code', axis=1)
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


* Loop through Toronto city Borough Coordinates and display city map

In [105]:
# create map of Toronto using latitude and longitude values
map_to = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_to)  
    
map_to


* We will do the clustring of all those Boroughs where Toronto is part of Borough name 

In [106]:
to_downtown_neighborhoods = neighborhoods[neighborhoods['Borough'].str.contains('Toronto')].reset_index(drop=True)
to_downtown_neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


* Disply these Boroughs as cluster on Map 

In [107]:
# Disply these Boroughs as cluster on Map 
map_to = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(to_downtown_neighborhoods['Latitude'], to_downtown_neighborhoods['Longitude'], to_downtown_neighborhoods['Borough'], to_downtown_neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_to)  
    
map_to

* Use Credentials to access Foursquare

In [109]:
CLIENT_ID = 'TKVOOCUPD0AUTPWYELLMG1VN5TJY4K0ASQBYR3QRTYGEKACE' # your Foursquare ID
CLIENT_SECRET = 'YIMMLNYUKL5WOK2EGUOOBCA3ZPXJZDYTVCWPVMYCMJBFOZS0' # your Foursquare Secret
VERSION = '20190809' # Foursquare API version



* Explore the Borough in location 1

In [110]:
to_downtown_neighborhoods.loc[1, 'Neighbourhood']

'The Danforth West, Riverdale'

* Get the Latitude and Longitude coordinates of that location

In [111]:
neighborhood_latitude = to_downtown_neighborhoods.loc[1, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = to_downtown_neighborhoods.loc[1, 'Longitude'] # neighborhood longitude value

neighborhood_name = east_to_neighborhoods.loc[1, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Highland Creek, Rouge Hill, Port Union are 43.6795571, -79.352188.


* Limit the number of venus within the radius of 500

In [112]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=TKVOOCUPD0AUTPWYELLMG1VN5TJY4K0ASQBYR3QRTYGEKACE&client_secret=YIMMLNYUKL5WOK2EGUOOBCA3ZPXJZDYTVCWPVMYCMJBFOZS0&v=20190809&ll=43.6795571,-79.352188&radius=500&limit=100'

In [113]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d4dc8ccd69ed0002d0a2eac'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Greektown',
  'headerFullLocation': 'Greektown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 41,
  'suggestedBounds': {'ne': {'lat': 43.6840571045, 'lng': -79.34597738331301},
   'sw': {'lat': 43.675057095499994, 'lng': -79.35839861668698}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bce4183ef10952197da8386',
       'name': 'Pantheon',
       'location': {'address': '407 Danforth Ave.',
        'crossStreet': 'at Chester Ave.',
        'lat': 43.67762124481265,
        'lng': -79.35143390043564,
        'labeledLatLngs': [{'label': 'di

* Extract venue category

In [114]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

* Get venues and display first 5 using Head function

In [116]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Pantheon,Greek Restaurant,43.677621,-79.351434
1,Dolce Gelato,Ice Cream Shop,43.677773,-79.351187
2,MenEssentials,Cosmetics Shop,43.67782,-79.351265
3,Cafe Fiorentina,Italian Restaurant,43.677743,-79.350115
4,La Diperie,Ice Cream Shop,43.67753,-79.352295


* Print total venues found in that area

In [118]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

41 venues were returned by Foursquare.
