# Explore and Cluster the neighborhoods in toronto

Let's import the required packages.


In [1]:
import pandas as pd
import numpy as np
import json
import requests
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium

Let's download and explore the dataset.


In [2]:
df_geo = pd.read_csv('tor_geo.csv')
df_geo.drop(['Postcode'], axis=1, inplace=True)

In [3]:
df_geo.head(10)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476
5,Scarborough,Scarborough Village,43.744734,-79.239476
6,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


Let's check how many unique boroughs are present.

In [4]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(df_geo['Borough'].unique()), df_geo.shape[0]))

The dataframe has 10 boroughs and 103 neighborhoods.


Let's use the `geopy` library to get the latitude and longitude of toronto city.

In [5]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent='http')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto Canada are 43.6534817, -79.3839347.


Let's create a map of Toronto with its neighborhoods.

In [6]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, borough, neighborhood in zip(df_geo['Latitude'], df_geo['Longitude'], df_geo['Borough'], df_geo['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 4,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#87323a',
        fill_opacity = '0.5',
        parse_html = False).add_to(map_toronto)
map_toronto

Let's simplify the above map and segment and cluster only the `borough` which has `toronto` in its data.

In [7]:
toronto_data = df_geo[df_geo['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,East Toronto,The Beaches,43.676357,-79.293031
1,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,East Toronto,Studio District,43.659526,-79.340923
4,Central Toronto,Lawrence Park,43.72802,-79.38879
5,Central Toronto,Davisville North,43.712751,-79.390197
6,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
7,Central Toronto,Davisville,43.704324,-79.38879
8,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049


In [8]:
print(toronto_data.shape)

(39, 4)


Let's mark the the neighborhoods of toronto in the map.

In [10]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start = 11)

for lat, long, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, long],
        radius = 4,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#dc31ad',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_toronto)
map_toronto

Let's integrate with `Foursquare API` and explore the neighborhoods and segment them.

In [11]:
CLIENT_ID = 'YOUR_CLIENT_ID'
CLIENT_SECRET = 'YOUR_CLIENT_SECRET'
VERSION = '20200721'
RADIUS = '500'
LIMIT = '200'

Let's explore the first neighborhood data.

In [12]:
toronto_data.loc[0, 'Neighbourhood']

'The Beaches'

Let's get the `neighborhoods` latitude and longitude values.

In [13]:
neighborhood_latitude = toronto_data.loc[0, 'Latitude']
neighborhood_longitude = toronto_data.loc[0,'Longitude']

neighborhood_name = toronto_data.loc[0, 'Neighbourhood']

print('The latitude and longitude values of {} are {} and {}'.format(neighborhood_name, neighborhood_latitude, neighborhood_longitude))


The latitude and longitude values of The Beaches are 43.67635739999999 and -79.2930312


### Now, let's get top 100 values of neighbours of toronto.
First let's create the GET request URL.

In [14]:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, CLIENT_SECRET, VERSION,neighborhood_latitude, neighborhood_longitude, RADIUS, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=RYYFZJFDIRK20PAL1TUJQ3GMQBDOCZKVL3DGHMYDZ3Z0DWC3&client_secret=3O2WNCMNFX3MTNWRZCWRKVDHGC1NCXPDYM0QGUZFBUDR5T5I&v=20200721&ll=43.67635739999999,-79.2930312&radius=500&limit=200'

Let's send the GET request.

In [18]:
results = requests.get(url, headers=headers).json()
results

{'meta': {'code': 200, 'requestId': '5f167ac85d15c80dc76f3bc7'},
 'response': {'headerLocation': 'The Beaches',
  'headerFullLocation': 'The Beaches, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.680857404499996,
    'lng': -79.28682091449052},
   'sw': {'lat': 43.67185739549999, 'lng': -79.29924148550948}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd461bc77b29c74a07d9282',
       'name': 'Glen Manor Ravine',
       'location': {'address': 'Glen Manor',
        'crossStreet': 'Queen St.',
        'lat': 43.67682094413784,
        'lng': -79.29394208780985,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.67682094413784,
          'lng': -79.29394208780985}],
        'distanc

Let's extract the category of the venue.

In [19]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Let's make that json data to pandas dataframe.

In [20]:
venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues)


Unnamed: 0,name,categories,lat,lng
0,Glen Manor Ravine,Trail,43.676821,-79.293942
1,The Big Carrot Natural Food Market,Health Food Store,43.678879,-79.297734
2,Grover Pub and Grub,Pub,43.679181,-79.297215
3,Upper Beaches,Neighborhood,43.680563,-79.292869
4,Seaspray Restaurant,Asian Restaurant,43.678888,-79.298167


In [22]:
print("{} venues were returned by foursquare.".format(nearby_venues.shape[0]))

5 venues were returned by foursquare.
