In [94]:
from bs4 import BeautifulSoup #Web Scrapping
import pandas as pd #library for dataframe
import numpy as np
from tabulate import tabulate
import json #library for json file
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library

In [95]:
# Import Toranto Neighbourhood wiki html file
import requests
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)
response.text[:100]

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title'

In [96]:
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))[0]
df.head()


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [97]:
print('Number of Borough Not assigned : {}'.format(df[df['Borough'] == 'Not assigned'].count()))

Number of Borough Not assigned : Postcode         77
Borough          77
Neighbourhood    77
dtype: int64


In [98]:
# Ignore cells with a borough that is Not assigned.
df = df[df['Borough'] != 'Not assigned']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [99]:
tornto = df.groupby(['Postcode','Borough']).agg(lambda x: ','.join(set(x)))
tornto = tornto.reset_index()
tornto.columns = ['Postal Code','Borough','Neighborhood']
tornto.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek"
2,M1E,Scarborough,"West Hill,Guildwood,Morningside"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Oakridge,Golden Mile"
8,M1M,Scarborough,"Scarborough Village West,Cliffcrest,Cliffside"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [100]:
# Read GeoFile
geo_data = pd.read_csv('https://cocl.us/Geospatial_data')
geo_data.head()
#geo_data.to_csv(r'Data\geodata.csv')

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [101]:
tornto = pd.merge(tornto,geo_data,how='left',on='Postal Code')
tornto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"West Hill,Guildwood,Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Use geopy library to get the latitude and longitude values of Toronto City. 

In [102]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


In [103]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(tornto['Latitude'], tornto['Longitude'],tornto['Borough'], tornto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

## Import _Foursquare_ Credential

In [104]:
CLIENT_ID = 'UBLX5C1RQM4EQJIT20SYTFY2YAMDC51IIGFTHGEHKXR2T44B' # Foursquare ID
CLIENT_SECRET = 'EQ5U14PRWPDADRFB5KEJY3CG4VBDABSWKWZQHTHSU5YPHTWO' #Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: UBLX5C1RQM4EQJIT20SYTFY2YAMDC51IIGFTHGEHKXR2T44B
CLIENT_SECRET:EQ5U14PRWPDADRFB5KEJY3CG4VBDABSWKWZQHTHSU5YPHTWO


# Foursquare
### Function that extracts the category of the venue

In [158]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


#Extract Venues from FourSquare as lat and long given as input
def foursquareVenue(neighborhood_name, neighborhood_latitude, neighborhood_longitude,redius=500,limit=100):
    
    """Extract Venues from FourSquare as lat and long given as input"""
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
     CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT)
    
    print("Neighborhood Name: {}".format(neighborhood_name))

    #Get Url resopnse from FourSquare
    from pandas.io.json import json_normalize

    response = json_normalize(requests.get(url).json()["response"]['groups'][0]['items']) #Save as json file as normalized datafarme
    # clean columns
    columns_name = ['vanue','vanue_category','venue_latitude','venue_longitude']
    if response.empty:
        venues = pd.DataFrame(np.nan,index=[0],columns= columns_name)
    else:
    # filter columns
        venues = response.loc[:, ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']]

        # filter the category for each row
        # use function get_category_type() apply row wise in category object
        venues['venue.categories'] = venues.apply(get_category_type, axis=1)
        # clean columns
        venues.columns = columns_name
    #Add Neighborhood Details
    venues['neighborhood_name'], venues['neighborhood_latitude'], venues['neighborhood_longitude'] = neighborhood_name, neighborhood_latitude, neighborhood_longitude

    return venues


In [122]:

neighborhood_latitude = tornto.loc[16, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = tornto.loc[16, 'Longitude'] # neighborhood longitude value

neighborhood_name = tornto.loc[1, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of **{}** are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of **Rouge Hill,Port Union,Highland Creek** are 43.836124700000006, -79.20563609999999.


In [136]:
def foursquare(neighborhood_name, neighborhood_latitude, neighborhood_longitude,redius=500,limit=100):
    
    """Extract Venues from FourSquare as lat and long given as input"""
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
     CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT)
    response = json_normalize(requests.get(url).json()["response"]['groups'][0]['items'])
    return response
foursquare('Upper Rouge',  43.836125, -79.205636)

In [161]:
A = foursquareVenue('Upper Rouge',  43.836125, -79.205636)
B = foursquareVenue(neighborhood_name, neighborhood_latitude, neighborhood_longitude)
pd.concat([A,B],axis=0)

Neighborhood Name: Upper Rouge
Neighborhood Name: Northwest


Unnamed: 0,vanue,vanue_category,venue_latitude,venue_longitude,neighborhood_name,neighborhood_latitude,neighborhood_longitude
0,,,,,Upper Rouge,43.836125,-79.205636
0,Economy Rent A Car,Rental Car Location,43.708471,-79.589943,Northwest,43.706748,-79.594054
1,Logistics Distribution,Bar,43.707554,-79.589252,Northwest,43.706748,-79.594054
2,Saand Rexdale,Drugstore,43.705072,-79.598725,Northwest,43.706748,-79.594054


In [164]:
column_names = foursquareVenue(neighborhood_name,  neighborhood_latitude, neighborhood_longitude).columns
df = pd.DataFrame(columns = column_names)

Neighborhood Name: Northwest


In [165]:
df.head()

Unnamed: 0,vanue,vanue_category,venue_latitude,venue_longitude,neighborhood_name,neighborhood_latitude,neighborhood_longitude


In [166]:
for neighborhood_name, neighborhood_latitude, neighborhood_longitude in zip(tornto['Neighborhood'],tornto['Latitude'],tornto['Longitude']):
    x = foursquareVenue(neighborhood_name,  neighborhood_latitude, neighborhood_longitude)
    df = pd.concat([df,x],axis=0)

Neighborhood Name: Rouge,Malvern
Neighborhood Name: Rouge Hill,Port Union,Highland Creek
Neighborhood Name: West Hill,Guildwood,Morningside
Neighborhood Name: Woburn
Neighborhood Name: Cedarbrae
Neighborhood Name: Scarborough Village
Neighborhood Name: East Birchmount Park,Ionview,Kennedy Park
Neighborhood Name: Clairlea,Oakridge,Golden Mile
Neighborhood Name: Scarborough Village West,Cliffcrest,Cliffside
Neighborhood Name: Birch Cliff,Cliffside West
Neighborhood Name: Wexford Heights,Dorset Park,Scarborough Town Centre
Neighborhood Name: Maryvale,Wexford
Neighborhood Name: Agincourt
Neighborhood Name: Tam O'Shanter,Sullivan,Clarks Corners
Neighborhood Name: Steeles East,Milliken,L'Amoreaux East,Agincourt North
Neighborhood Name: L'Amoreaux West
Neighborhood Name: Upper Rouge
Neighborhood Name: Hillcrest Village
Neighborhood Name: Henry Farm,Fairview,Oriole
Neighborhood Name: Bayview Village
Neighborhood Name: Silver Hills,York Mills
Neighborhood Name: Willowdale,Newtonbrook
Neighborho

In [167]:
df.head()

Unnamed: 0,vanue,vanue_category,venue_latitude,venue_longitude,neighborhood_name,neighborhood_latitude,neighborhood_longitude
0,Wendy's,Fast Food Restaurant,43.807448,-79.199056,"Rouge,Malvern",43.806686,-79.194353
1,Interprovincial Group,Print Shop,43.80563,-79.200378,"Rouge,Malvern",43.806686,-79.194353
0,Royal Canadian Legion,Bar,43.782533,-79.163085,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497
0,G & G Electronics,Electronics Store,43.765309,-79.191537,"West Hill,Guildwood,Morningside",43.763573,-79.188711
1,Marina Spa,Spa,43.766,-79.191,"West Hill,Guildwood,Morningside",43.763573,-79.188711
