In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from bs4 import BeautifulSoup
import html5lib

print('Libraries imported.')

Libraries imported.


## Scrapping the Web page: 

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
results = requests.get(url)
soup = BeautifulSoup(results.content, 'html.parser')
# print(soup.prettify())

In [3]:
table_html = soup.find('table')
table_pd = pd.read_html(str(table_html), flavor = 'html5lib')[0]
table_pd.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## Preparing the Dataframe:

In [4]:
table_pd = table_pd[table_pd.Borough!='Not assigned'].reset_index(drop=True)
table_pd = table_pd.sort_values('Postal Code',axis = 0, ascending = True, ignore_index = True)
table_pd.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [10]:
table_pd.shape

(103, 3)

In [5]:
postal_code_pd = pd.read_csv('Geospatial_Coordinates.csv')

In [6]:
table_pd = table_pd.join(postal_code_pd[['Latitude','Longitude']])

In [27]:
table_pd.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Plotting map of Toronto with labels:

In [7]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude_toronto = location.latitude
longitude_toronto = location.longitude

In [40]:
map_toronto = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=10)

for lat, lon, code, borough in zip(table_pd['Latitude'],table_pd['Longitude'], table_pd['Postal Code'], table_pd['Borough']):
    # print('{}, {}, {}'.format(label, lon, lat))
    label = '{}, {}'.format(code, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

# map_toronto

## Exploring and Clustering

### Setting up the Four Square API:

In [9]:
CLIENT_ID = 'TWQNBUQDQIII1KZJ0RTLFNG0NR15AVJIZU5KFPHM3ANEHMHU' # your Foursquare ID
CLIENT_SECRET = 'VD0GO1D1LCR1T5GYDJHBVXS4REOQD4KVWSADZAZGKXOAQ1DU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

### Getting the top 100 venues that are in Marble Hill within a radius of 500 meters.

In [10]:
LIMIT = 100
radius = 500

neighborhood_latitude = 43.7116948
neighborhood_longitude = -79.4169356

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_latitude, neighborhood_longitude, VERSION, radius, LIMIT)


In [11]:
results = requests.get(url).json()

In [12]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
nearby_venues.head()

Unnamed: 0,referralId,reasons.count,reasons.items,venue.id,venue.name,venue.location.lat,venue.location.lng,venue.location.labeledLatLngs,venue.location.distance,venue.location.cc,venue.location.city,venue.location.state,venue.location.country,venue.location.formattedAddress,venue.categories,venue.photos.count,venue.photos.groups,venue.location.address,venue.location.postalCode
0,e-0-4e6e176c45dd293273b74e3c-0,0,"[{'summary': 'This spot is popular', 'type': '...",4e6e176c45dd293273b74e3c,Rosalind's Garden Oasis,43.712189,-79.411978,"[{'label': 'display', 'lat': 43.71218888050602...",402,CA,Toronto,ON,Canada,"[Toronto ON, Canada]","[{'id': '4bf58dd8d48988d15a941735', 'name': 'G...",0,[],,
1,e-0-5e85459eb00ff700074aea96-1,0,"[{'summary': 'This spot is popular', 'type': '...",5e85459eb00ff700074aea96,THA Home Additions Toronto,43.708369,-79.420434,"[{'label': 'display', 'lat': 43.70836906038992...",465,CA,Toronto,ON,Canada,"[485 Eglington Ave E #102 D, Toronto ON M4P 1N...","[{'id': '545419b1498ea6ccd0202f58', 'name': 'H...",0,[],485 Eglington Ave E #102 D,M4P 1N2


In [13]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
# type(nearby_venues.loc[:,filtered_columns])
nearby_venues = nearby_venues[filtered_columns]

In [73]:
# nearby_venues.loc[1,'venue.categories'][0]

In [14]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    
    if len(categories_list)==0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type,axis=1)

In [16]:
nearby_venues.columns = map(str.title,[col_name.split('.')[-1] for col_name in nearby_venues.columns])

In [17]:
nearby_venues.head()

Unnamed: 0,Name,Categories,Lat,Lng
0,Rosalind's Garden Oasis,Garden,43.712189,-79.411978
1,THA Home Additions Toronto,Home Service,43.708369,-79.420434


In [24]:
import time

In [46]:
borough_venue = pd.DataFrame()
for postal_code, borough, borough_lat, borough_lng in zip(table_pd['Postal Code'], table_pd['Borough'], table_pd['Latitude'], table_pd['Longitude']):
    print('{},{}'.format(postal_code,borough))
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, borough_lat, borough_lng, VERSION, radius, LIMIT)
    nearby_venues = json_normalize(requests.get(url).json()['response']['groups'][0]['items'])
    # print(len(nearby_venues.columns))
    if len(nearby_venues.columns)>=1:
        # print('yes')
        nearby_venues = nearby_venues[filtered_columns]
        # print(nearby_venues[filtered_columns])
        nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
        nearby_venues.columns = map(str.title,[col_name.split('.')[-1] for col_name in nearby_venues.columns])
        nearby_venues['Postal Code'] = postal_code 
        nearby_venues['Borough'] = borough
        nearby_venues['Postal Code latitude'] = borough_lat 
        nearby_venues['Postal Code longitude'] = borough_lng
        nearby_venues
        # time.sleep(8) 
        borough_venue = borough_venue.append(nearby_venues)

M1B,Scarborough
M1C,Scarborough
M1E,Scarborough
M1G,Scarborough
M1H,Scarborough
M1J,Scarborough
M1K,Scarborough
M1L,Scarborough
M1M,Scarborough
M1N,Scarborough
M1P,Scarborough
M1R,Scarborough
M1S,Scarborough
M1T,Scarborough
M1V,Scarborough
M1W,Scarborough
M1X,Scarborough
M2H,North York
M2J,North York
M2K,North York
M2L,North York
M2M,North York
M2N,North York
M2P,North York
M2R,North York
M3A,North York
M3B,North York
M3C,North York
M3H,North York
M3J,North York
M3K,North York
M3L,North York
M3M,North York
M3N,North York
M4A,North York
M4B,East York
M4C,East York
M4E,East Toronto
M4G,East York
M4H,East York
M4J,East York
M4K,East Toronto
M4L,East Toronto
M4M,East Toronto
M4N,Central Toronto
M4P,Central Toronto
M4R,Central Toronto
M4S,Central Toronto
M4T,Central Toronto
M4V,Central Toronto
M4W,Downtown Toronto
M4X,Downtown Toronto
M4Y,Downtown Toronto
M5A,Downtown Toronto
M5B,Downtown Toronto
M5C,Downtown Toronto
M5E,Downtown Toronto
M5G,Downtown Toronto
M5H,Downtown Toronto
M5J,Downtow

In [48]:
 borough_venue = borough_venue[['Borough','Postal Code','Postal Code latitude', 'Postal Code longitude','Name', 'Categories', 'Lat','Lng']]
 borough_venue.head() 

Unnamed: 0,Borough,Postal Code,Postal Code latitude,Postal Code longitude,Name,Categories,Lat,Lng
0,Scarborough,M1B,43.806686,-79.194353,Wendy’s,Fast Food Restaurant,43.807448,-79.199056
0,Scarborough,M1C,43.784535,-79.160497,RIGHT WAY TO GOLF,Golf Course,43.785177,-79.161108
1,Scarborough,M1C,43.784535,-79.160497,Royal Canadian Legion,Bar,43.782533,-79.163085
0,Scarborough,M1E,43.763573,-79.188711,RBC Royal Bank,Bank,43.76679,-79.191151
1,Scarborough,M1E,43.763573,-79.188711,G & G Electronics,Electronics Store,43.765309,-79.191537


In [52]:
borough_venue.shape
borough_venue['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       'Mississauga', 'Etobicoke'], dtype=object)

In [76]:
borough_venue.groupby(borough_venue['Borough']).count()

Unnamed: 0_level_0,Postal Code,Postal Code latitude,Postal Code longitude,Name,Categories,Lat,Lng
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Central Toronto,115,115,115,115,115,115,115
Downtown Toronto,1242,1242,1242,1242,1242,1242,1242
East Toronto,123,123,123,123,123,123,123
East York,74,74,74,74,74,74,74
Etobicoke,73,73,73,73,73,73,73
Mississauga,13,13,13,13,13,13,13
North York,244,244,244,244,244,244,244
Scarborough,93,93,93,93,93,93,93
West Toronto,159,159,159,159,159,159,159
York,16,16,16,16,16,16,16


In [60]:
# borough_toronto_venu = borough_venue.loc["Toronto" in borough_venue['Borough']]
borough_toronto_venu = borough_venue.loc[ borough_venue['Borough'] == 'York']
borough_toronto_venu.head()

Unnamed: 0,Borough,Postal Code,Postal Code latitude,Postal Code longitude,Name,Categories,Lat,Lng
0,York,M6C,43.693781,-79.428191,Cedarvale Park,Field,43.692535,-79.428705
1,York,M6C,43.693781,-79.428191,Cedarvale Ravine,Trail,43.690188,-79.426106
2,York,M6C,43.693781,-79.428191,Phil White Arena,Hockey Arena,43.691303,-79.431761
0,York,M6E,43.689026,-79.453512,Nairn Park,Park,43.690654,-79.4563
1,York,M6E,43.689026,-79.453512,Maximum Woman,Women's Store,43.690651,-79.456333


In [78]:
# list('Toronto' in x for x in borough_venue['Borough'])  
borough_toronto_venue = borough_venue.loc[list('Toronto' in x for x in borough_venue['Borough'])]
# borough_toronto_venu
borough_toronto_venue['Borough'].unique()
borough_toronto_venue.groupby(borough_toronto_venue['Borough']).count()

Unnamed: 0_level_0,Postal Code,Postal Code latitude,Postal Code longitude,Name,Categories,Lat,Lng
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Central Toronto,115,115,115,115,115,115,115
Downtown Toronto,1242,1242,1242,1242,1242,1242,1242
East Toronto,123,123,123,123,123,123,123
West Toronto,159,159,159,159,159,159,159
