# Part 1

In [1]:
import pandas as pd

In [99]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [101]:
df = df[df['Borough'] != 'Not assigned']
df = df.reset_index()
df.head()

Unnamed: 0,index,Postal code,Borough,Neighborhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,Regent Park / Harbourfront
3,5,M6A,North York,Lawrence Manor / Lawrence Heights
4,6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [102]:
df['Neighborhood'] = df['Neighborhood'].apply(lambda s: s.replace(' / ', ', '))
df.head()

Unnamed: 0,index,Postal code,Borough,Neighborhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,5,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [103]:
df.shape

(103, 4)

# Part 2

In [6]:
!conda install -c conda-forge geocoder --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geocoder:        1.38.1-py_1       conda-forge
    python_abi:    

In [105]:
import geocoder
import math

def get_lat_long(postal_code):
    return tuple(geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code)).latlng)

In [106]:
lats_and_longs = pd.DataFrame(df['Postal code'].apply(lambda s: get_lat_long(s)).tolist(), columns=['Latitude', 'Longitude'])

In [109]:
df[['Latitude', 'Longitude']] = lats_and_longs
df.head()

Unnamed: 0,index,Postal code,Borough,Neighborhood,Latitude,Longitude
0,2,M3A,North York,Parkwoods,43.752935,-79.335641
1,3,M4A,North York,Victoria Village,43.728102,-79.31189
2,4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
3,5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723265,-79.451211
4,6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66179,-79.38939


In [110]:
df.tail()

Unnamed: 0,index,Postal code,Borough,Neighborhood,Latitude,Longitude
98,160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65334,-79.509766
99,165,M4Y,Downtown Toronto,Church and Wellesley,43.666659,-79.381472
100,168,M7Y,East Toronto,Business reply mail Processing CentrE,43.6487,-79.38545
101,169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.632798,-79.493017
102,178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.62549,-79.526


# Part 3

In [70]:
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         713 KB

The following NEW packages will be INSTALLED:

    altair:  4.1.0-py_1 conda-forge
    branca:  0.4.0-py_0 conda-forge
    folium:  0.5.0-py_0 conda-forge
    vincent: 0.4.4-py_1 conda-forge


Downloading and Extracting Packages
vincent-0.4.4        | 28 KB     | #####

In [82]:
from geopy.geocoders import Nominatim
import folium

**Get the coordinate of Toronto:**

In [80]:
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geocoder.arcgis(address).latlng
latitude = location[0]
longitude = location[1]

**Show the postal codes on a map:**

In [116]:
# Create map
my_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# Add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(my_map)  
    
my_map

**Get venues close to the postal codes:**

In [117]:
# Foursquare credentials
CLIENT_ID = '???'
CLIENT_SECRET = '???'
VERSION = '20180605'
LIMIT = 100

In [127]:
import requests

In [128]:
def get_nearby_venues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json().get("response", {}).get('groups', [{'items': []}])[0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [129]:
venues = get_nearby_venues(df['Neighborhood'], df['Latitude'], df['Longitude'])

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview
The Danforth West, Ri

**Show venues on a map:**

In [134]:
# Create map
my_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# Add markers to map
for lat, lng, venue, category, neighborhood in zip(venues['Venue Latitude'], venues['Venue Longitude'], venues['Venue'], venues['Venue Category'], venues['Neighborhood']):
    label = '{}, {}, {}'.format(venue, category, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(my_map)  
    
my_map

**Cluster the venues based on their coordinates:**

In [148]:
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

In [145]:
kclusters = 5

coordinates = venues[['Venue Latitude', 'Venue Longitude']]

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(coordinates)

In [149]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(venues['Venue Latitude'], venues['Venue Longitude'], venues['Neighborhood'], kmeans.labels_):
    import math
    cluster = cluster
    if math.isnan(cluster):
        cluster = 1.0
    cluster = int(cluster)
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster],
        fill=True,
        fill_color=rainbow[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters