# Part-1 Data Cleaning 

## Importing Libraries

In [2]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np
import matplotlib.pyplot as plt
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import json
from bs4 import BeautifulSoup
import html5lib
import geocoder

In [None]:
!wget -q -O 'toronto_data.csv' https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
print('Data downloaded!')

## Reading Table Data from Wikipedia

Data Cleaning Remarks:
- The value from the wikipedia is already been aggregated, for example, a `Postal Code` is no longer listed twice for different neighborhood. 
- The `Not assigned` value from `Borough` column will be dropped amnd stored as `toronto data`
- The neighborhood has no `Not assigned value`

In [14]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

response = requests.get(url)
soup = BeautifulSoup(response.text,'html.parser')

table = soup.find('table',{'class':'wikitable sortable'})
toronto_data = pd.read_html(str(table))
toronto_data = toronto_data[0]
toronto_data = toronto_data[toronto_data.Borough != 'Not assigned']
toronto_data.reset_index()[toronto_data.columns]

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [4]:
toronto_data.shape

(103, 3)

In [15]:
toronto_data[toronto_data["Neighborhood"] == "Not assigned"]

Unnamed: 0,Postal Code,Borough,Neighborhood


To prove the `Not assigned` doesn't exist on the `Neighboorhood`, following code is used.

In [12]:
toronto_data.where(toronto_data['Neighborhood'] == 'Not assigned').isna().count()

Postal Code     103
Borough         103
Neighborhood    103
dtype: int64

# Part-2 Adding Latitude and Longitude

In [6]:
postal_code = toronto_data['Postal Code']
lat_lng_coords = None

while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

KeyboardInterrupt: 

In [None]:
lat_lng_coords

In [20]:
locations = pd.read_csv("https://cocl.us/Geospatial_data")
locations

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [49]:
lat_lng_coords = toronto_data.join(locations.set_index(["Postal Code"]), on = "Postal Code")
lat_lng_coords = lat_lng_coords.reset_index()[lat_lng_coords.columns]
lat_lng_coords

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# Part-3 Mapping and Clustering

Retrieving Toronto Coordinates

In [50]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


Creating superimposed markers on top of the map

In [51]:
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10.5)

# add markers to map
for lat, lng, label in zip(lat_lng_coords['Latitude'], lat_lng_coords['Longitude'], lat_lng_coords['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7.5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

In [53]:
CLIENT_ID = 'W2A2DWDTAEPECPBIFJH5EBG2DXRU0GU3KF5NVDRBFUNVOYVK' # your Foursquare ID
CLIENT_SECRET = '4HSJKLWT4SBVHQTSFSXGFIAKGQKUZNRABUXO01QUR5LKQCVB' # your Foursquare Secret
VERSION = '20180604'

LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}&time=any&day=any'.format(
    CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=W2A2DWDTAEPECPBIFJH5EBG2DXRU0GU3KF5NVDRBFUNVOYVK&client_secret=4HSJKLWT4SBVHQTSFSXGFIAKGQKUZNRABUXO01QUR5LKQCVB&ll=43.6534817,-79.3839347&v=20180604&radius=500&limit=100&time=any&day=any'

In [56]:
toronto_boroughs = lat_lng_coords[lat_lng_coords['Borough'].str.contains('Toronto')]
toronto_boroughs

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259
