# Part 1

In [1]:
import pandas as pd
import numpy as np

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
data = pd.read_html(url)[0]

In [3]:
data.rename(columns={'Postcode': 'PostalCode'}, inplace=True)  
ix = data['Borough'] == 'Not assigned'
data.drop(data[ix].index, inplace=True)

In [4]:
data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [5]:
data['PostalCode'].nunique(), len(data), data['PostalCode'].nunique() == len(data)

(103, 210, False)

In [6]:
post_neighbour_dic = {}
for _, row in data.iterrows():
    if row['PostalCode'] in post_neighbour_dic:
        post_neighbour_dic[row['PostalCode']].append(row['Neighbourhood']) 
    else:
        post_neighbour_dic[row['PostalCode']] = [row['Neighbourhood']] 

In [7]:
post_neighbour_dic['M9B']

['Cloverdale',
 'Islington',
 'Martin Grove',
 'Princess Gardens',
 'West Deane Park']

In [8]:
data['Neighbourhood'] = data['PostalCode'].apply(lambda x: ','.join(post_neighbour_dic[x]))

data.drop_duplicates(inplace=True) 

In [9]:
data.sample(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
265,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout..."
144,M5N,Central Toronto,Roselawn
127,M3M,North York,Downsview Central
238,M4W,Downtown Toronto,Rosedale
13,M3B,North York,Don Mills North
242,M8W,Etobicoke,"Alderwood,Long Branch"
261,M4Y,Downtown Toronto,Church and Wellesley
33,M5C,Downtown Toronto,St. James Town
197,M5T,Downtown Toronto,"Chinatown,Grange Park,Kensington Market"
239,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade


In [10]:
sum(data['Neighbourhood'] == 'Not assigned')

1

In [11]:
for _, row in data.iterrows():
    if row['Neighbourhood'] == 'Not assigned': row['Neighbourhood'] = row['Borough']

In [12]:
sum(data['Neighbourhood'] == 'Not assigned')

0

In [13]:
data[data['Borough'].str.contains("Queen")]

Unnamed: 0,PostalCode,Borough,Neighbourhood
7,M7A,Queen's Park,Queen's Park
9,M9A,Queen's Park,Queen's Park


In [14]:
data.shape

(103, 3)

# Part 2

In [15]:
!pip install pgeocode
import pgeocode

nomi = pgeocode.Nominatim('ca') 

Collecting pgeocode
  Downloading https://files.pythonhosted.org/packages/45/12/c02be61e117d19a43b3d2b804311eedf49c0158f446d5b0d52f259c4b0fb/pgeocode-0.1.2-py2.py3-none-any.whl
Installing collected packages: pgeocode
Successfully installed pgeocode-0.1.2


In [16]:
data['Latitude'] = data['PostalCode'].apply(lambda x: nomi.query_postal_code(x))['latitude']
data['Longitude'] = data['PostalCode'].apply(lambda x: nomi.query_postal_code(x))['longitude']

In [17]:
data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.7545,-79.33
3,M4A,North York,Victoria Village,43.7276,-79.3148
4,M5A,Downtown Toronto,Harbourfront,43.6555,-79.3626
5,M6A,North York,"Lawrence Heights,Lawrence Manor",43.7223,-79.4504
7,M7A,Queen's Park,Queen's Park,43.6641,-79.3889


# Part 3

In [18]:
data_toronto = data[data['Borough'].str.contains('Toronto')].reset_index(drop=True)
data_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.6555,-79.3626
1,M5B,Downtown Toronto,"Ryerson,Garden District",43.6572,-79.3783
2,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756
3,M4E,East Toronto,The Beaches,43.6784,-79.2941
4,M5E,Downtown Toronto,Berczy Park,43.6456,-79.3754


In [19]:
data_toronto['Borough'].unique()

array(['Downtown Toronto', 'East Toronto', 'West Toronto',
       'Central Toronto'], dtype=object)

In [20]:
data_toronto['Neighbourhood'].nunique()

38

In [21]:
import json 
import requests 

from sklearn.cluster import KMeans 

!pip install folium
import folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 12.6MB/s eta 0:00:01
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.10.1


In [22]:
latitude, longitude = data_toronto.loc[0, 'Latitude'], data_toronto.loc[0, 'Longitude']
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(data_toronto['Latitude'], data_toronto['Longitude'], 
                                            data_toronto['Borough'], data_toronto['Neighbourhood']):
    label = f"{neighborhood}, {borough}"
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='#1E90FF',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [28]:
CLIENT_ID = '...'
CLIENT_SECRET = '...'
VERSION = '20191205'

In [29]:
LIMIT = 100
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url, verify=False).json()["response"]
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
requests.packages.urllib3.disable_warnings() 


toronto_venues = getNearbyVenues(names=data_toronto['Neighbourhood'],
                                   latitudes=data_toronto['Latitude'],
                                   longitudes=data_toronto['Longitude'])

Harbourfront
Ryerson,Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide,King,Richmond
Dovercourt Village,Dufferin
Harbourfront East,Toronto Islands,Union Station
Little Portugal,Trinity
The Danforth West,Riverdale
Design Exchange,Toronto Dominion Centre
Brockton,Exhibition Place,Parkdale Village
The Beaches West,India Bazaar
Commerce Court,Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North,Forest Hill West
High Park,The Junction South
North Toronto West
The Annex,North Midtown,Yorkville
Parkdale,Roncesvalles
Davisville
Harbord,University of Toronto
Runnymede,Swansea
Moore Park,Summerhill East
Chinatown,Grange Park,Kensington Market
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown,St. James Town
First Canadian Place,Underground city
Church and W

ValueError: Length mismatch: Expected axis has 0 elements, new values have 7 elements