In [50]:
import numpy as np
import pandas as pd
import seaborn as sns
import folium
import requests
from sklearn.cluster import KMeans
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
import matplotlib.pyplot as plt
import matplotlib.axes._axes as axes
sns.set()

In [39]:
df = pd.read_csv('toronto_df_longlat.csv')
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
4,M9A,Queen's Park,Queen's Park,43.667856,-79.532242


In [40]:
# lets concentrate on boroughs that contain Toronto
toronto_df =df[df['Borough'].str.contains('Toronto')]

Use geopy library to get the latitude and longitutde of Toronto, city.

In [41]:
address = 'Toronto, Ontario, Canada'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print(f'The geographical coordinates of Toronto are: lat:{latitude} and '
      f'long:{longitude}')

The geographical coordinates of Toronto are: lat:43.653963 and long:-79.387207


Create a map of Toronto with superimposed neighbourhoods

In [42]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for _, (postCode, borough, neighborhood, lat, lng) in toronto_df.iterrows():
    label = f'{neighborhood}, {borough}'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        (lat, lng),
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
map_toronto

Define Foursquare Credentials and Version

In [43]:
CLIENT_ID = 'MYP5LS452CUPIMYZU1XWFTI3BN3J2COWMU3UP5BADYU1J55G' # your Foursquare ID
CLIENT_SECRET = 'SDG1AUOYAOQ0QB0KDFUD0U21CLEMMVD4WRAAB5HJARXXPBRL' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: MYP5LS452CUPIMYZU1XWFTI3BN3J2COWMU3UP5BADYU1J55G
CLIENT_SECRET:SDG1AUOYAOQ0QB0KDFUD0U21CLEMMVD4WRAAB5HJARXXPBRL


Lets explore the first neighborhood in our dataframe

In [44]:
toronto_df.iloc[0]['Neighbourhood']

'Harbourfront'

Get the neighborhoods latitude and longitude

In [45]:
neighborhood_lat = toronto_df.iloc[0]['Latitude']
neighborhood_lng = toronto_df.iloc[0]['Longitude']
neighborhood_name = toronto_df.iloc[0]['Neighbourhood']

print(f'Latitude and Longitude of {neighborhood_name} are {neighborhood_lat}'
      f' and {neighborhood_lng}')

Latitude and Longitude of Harbourfront are 43.6542599 and -79.3606359


Now lets get the top 100 venues that are in Harbourfront within radius of 500m

In [46]:
radius = 500
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_lat, 
    neighborhood_lng, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=MYP5LS452CUPIMYZU1XWFTI3BN3J2COWMU3UP5BADYU1J55G&client_secret=SDG1AUOYAOQ0QB0KDFUD0U21CLEMMVD4WRAAB5HJARXXPBRL&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=100'

In [47]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e5234cdb57e88001bd15288'},
 'response': {'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 46,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.653446723052674,
          'lng': -79.3620167174383}],
        'distance': 143,
       

In [48]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [51]:
# Clean the JSON response and convert to dataframe
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  after removing the cwd from sys.path.


Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Gym / Fitness Center,43.653191,-79.357947
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149


In [52]:
print(f'{nearby_venues.shape[0]} venues were returned by Foursquare')

46 venues were returned by Foursquare


Explore Neighborhoods in Toronto

In [None]:
# Lets create a function to repeat the same process to all the neighborhoods

In [54]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [56]:
# Now invoke the above function on each neighborhood
toronto_venues = getNearbyVenues(names=toronto_df['Neighbourhood'],
                                   latitudes=toronto_df['Latitude'],
                                   longitudes=toronto_df['Longitude']
                                  )

Harbourfront
Queen's Park
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Studio District
Lawrence Park
Roselawn
Davisville North
North Toronto West
Davisville
Rosedale
Stn A PO Boxes 25 The Esplanade
Church and Wellesley
Business Reply Mail Processing Centre 969 Eastern
Ryerson,Garden District
Adelaide,King,Richmond
Dovercourt Village,Dufferin
Harbourfront East,Toronto Islands,Union Station
Little Portugal,Trinity
The Danforth West,Riverdale
Design Exchange,Toronto Dominion Centre
Brockton,Exhibition Place,Parkdale Village
The Beaches West,India Bazaar
Commerce Court,Victoria Hotel
Forest Hill North,Forest Hill West
High Park,The Junction South
The Annex,North Midtown,Yorkville
Parkdale,Roncesvalles
Harbord,University of Toronto
Runnymede,Swansea
Moore Park,Summerhill East
Chinatown,Grange Park,Kensington Market
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South N

Lets check the size of the resulting dataframe

In [57]:
print(toronto_venues.shape)
toronto_venues.head()

(1720, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653191,-79.357947,Gym / Fitness Center
3,Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Harbourfront,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
