In [21]:
import pandas as pd
import numpy as np
import requests
import random
import folium

from bs4 import BeautifulSoup

from geopy.geocoders import Nominatim
from IPython.display import Image
from IPython.core.display import HTML
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

print('All libraries are imported')

All libraries are imported


## 1. Get and Process the data

In [23]:
df_geo = pd.read_csv('http://cocl.us/Geospatial_data')
df_geo

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [24]:
result = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
soup = BeautifulSoup(result.content, 'lxml')
table = soup.find_all('table')[0]
df_neighborhood = pd.read_html(str(table))[0]
df_neighborhood = df_neighborhood.drop(df_neighborhood[df_neighborhood.Borough=='Not assigned'].index)

In [25]:
df_neighborhood

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [26]:
df_toronto = pd.merge( df_neighborhood, df_geo, on='Postal Code')
df_toronto

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


## 2. Create a map of Toronto with neighborhoods superimposed on top.

Use geopy library to get the latitude and longitude values of Toronto.

In [27]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [28]:
map_toronto = folium.Map([latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], 
                                            df_toronto['Longitude'], 
                                            df_toronto['Borough'], 
                                            df_toronto['Neighbourhood']):
    label = '{},{}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        label=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6,
        parse_html=False
    ).add_to(map_toronto)

map_toronto

## 3. Explore and cluster the neighborhoods in Toronto

### 3.1 We are going to work with North York

In [42]:
northyork_data = df_toronto[df_toronto['Borough']=='North York'].reset_index(drop=True)
northyork_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073
5,M3C,North York,Don Mills,43.7259,-79.340923
6,M2H,North York,Hillcrest Village,43.803762,-79.363452
7,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259
8,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
9,M3J,North York,"Northwood Park, York University",43.76798,-79.487262


### 3.2 get geographical coordinates of North York

In [30]:
map_northyork = folium.Map([latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(northyork_data['Latitude'], 
                                            northyork_data['Longitude'], 
                                            northyork_data['Borough'], 
                                            northyork_data['Neighbourhood']):
    label = '{},{}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        label=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6,
        parse_html=False
    ).add_to(map_northyork)

map_northyork

### 3.3 Define foursquare Crediential and Version

In [31]:
CLIENT_ID = '13TMJMMA0X1YCDRQR5YJJSIWBE4GVYUHQQWWUFUNTYIESLBJ'
CLIENT_SECRER = 'ZEWRXSXJPJQCYR2GE4LCVDBSCDMEP0IVIMV3GP3YBTM2YJYK'
VERSION = '20201115'
LIMIT = 100

### 3.4 Explore the first neighborhood in the data frame

Get the neighborhood's name, latitude and longitude values.

In [44]:
neighborhood_name = northyork_data.loc[0, 'Neighbourhood']
neighborhood_latitude = northyork_data.loc[0, 'Latitude']
neighborhood_longitude = northyork_data.loc[0, 'Longitude']
print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


Now, let's get the top 100 venues that are in Parkwoods within a radius of 500 meters.

In [47]:
radius = 500
url = 'http://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude,
    neighborhood_longitude,
    radius,
    LIMIT
)
print(url)

http://api.foursquare.com/v2/venues/explore?client_id=13TMJMMA0X1YCDRQR5YJJSIWBE4GVYUHQQWWUFUNTYIESLBJ&client_secret=ZEWRXSXJPJQCYR2GE4LCVDBSCDMEP0IVIMV3GP3YBTM2YJYK&v=20201115&ll=43.7532586,-79.3296565&limit=500


Send the GET request and examine the resutls

In [48]:
result = requests.get(url).json()
result

{'meta': {'code': 200, 'requestId': '5fb2a670a5421e17ccbca4d0'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'suggestedRadius': 2964,
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 186,
  'suggestedBounds': {'ne': {'lat': 43.7803021191988,
    'lng': -79.29114776713618},
   'sw': {'lat': 43.73117913718101, 'lng': -79.36406371243268}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b8991cbf964a520814232e3',
       'name': "Allwyn's Bakery",
       'location': {'address': '81 Underhill drive',
        'lat': 43.75984035203157,
        'lng': -79.32471879917513,
        'labeledLatLngs': [{'label': 'display',
          'la

Now we are ready to clean the json and structure it into a pandas dataframe.

In [51]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [54]:
venues = result['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split('.')[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Allwyn's Bakery,Caribbean Restaurant,43.75984,-79.324719
1,Donalda Golf & Country Club,Golf Course,43.752816,-79.342741
2,Galleria Supermarket,Supermarket,43.75352,-79.349518
3,Graydon Hall Manor,Event Space,43.763923,-79.342961
4,Darband Restaurant,Middle Eastern Restaurant,43.755194,-79.348498


In [55]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


### 3.5 Explore all neighborhood in North York