In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

<a id='item1'></a>

In [None]:
import sys
!{sys.executable} -m pip install msgpack
!{sys.executable} -m pip install beautifulsoup4

import requests
from bs4 import BeautifulSoup

In [None]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
r=requests.get(url)
url=r.content
soup = BeautifulSoup(url,'html.parser')

table = soup.find("table",{"class":"wikitable sortable"})

In [None]:
# define the dataframe columns
column_names = ['Postcode','Borough', 'Neighbourhood'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
parsed_table_data=[]

In [None]:
rows = table.findAll('tr')
for row in rows:
    children = row.findChildren(recursive=False)
    row_text = []
    for child in children:
        clean_text = child.text
        #This is to discard reference/citation links
        clean_text = clean_text.split('&#91;')[0]
        #This is to clean the header row of the sort icons
        clean_text = clean_text.split('&#160;')[-1]
        clean_text = clean_text.strip()
        row_text.append(clean_text)
    parsed_table_data.append(row_text)

In [None]:
neighborhoods = pd.DataFrame.from_records(parsed_table_data)
neighborhoods.columns = neighborhoods.iloc[0]
neighborhoods=neighborhoods[neighborhoods.Postcode != 'Postcode']
neighborhoods=neighborhoods[neighborhoods.Borough != 'Not assigned']
neighborhoods = neighborhoods.reset_index(drop=True)
neighborhoods['Neighbourhood'] = np.where(neighborhoods['Neighbourhood'] == 'Not assigned', neighborhoods['Borough'], neighborhoods['Neighbourhood'])

In [None]:
neighborhoods=neighborhoods.groupby(['Postcode','Borough'])['Neighbourhood'].apply(','.join).reset_index()

In [None]:
url="https://cocl.us/Geospatial_data"
coordinates=pd.read_csv(url)
coordinates.rename(columns={'Postal Code':'Postcode'},inplace=True)

In [None]:
coordinates.rename(columns={'Postal Code':'Postcode'},inplace=True)

In [None]:
results=neighborhoods.merge(coordinates,on='Postcode')

In [None]:
dt_toronto_data = results[neighborhoods['Borough'] == 'Downtown Toronto'].reset_index(drop=True)

In [None]:
address = 'Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

In [None]:
# create map of New York using latitude and longitude values
map_dt_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dt_toronto_data['Latitude'], dt_toronto_data['Longitude'], dt_toronto_data['Borough'], dt_toronto_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dt_toronto)  
    
map_dt_toronto

In [None]:
CLIENT_ID = 'GDQNMD2ZWZPB3EU1VNYMNFVJVFAGJUA4YJ5U11MPRNTQ3Y2R' # your Foursquare ID
CLIENT_SECRET = 'JGMDHYSAIKU4TM4NQJZW0OVUSVAQCVBCKFRG3UPLWVBQFELL' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

In [None]:
dt_toronto_data.loc[0, 'Neighbourhood']
neighbourhood_latitude = dt_toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = dt_toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighbourhood_name = dt_toronto_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

In [None]:
LIMIT = 50 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)
url # display URL

In [None]:
venue_results = requests.get(url).json()

In [None]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [None]:
venues = venue_results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

In [None]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        venue_results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in venue_results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [None]:
dt_toronto_venues = getNearbyVenues(names=dt_toronto_data['Postcode'],
                                   latitudes=dt_toronto_data['Latitude'],
                                   longitudes=dt_toronto_data['Longitude']
                                  )

In [None]:
# one hot encoding
dt_toronto_onehot = pd.get_dummies(dt_toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dt_toronto_onehot['Postcode'] = dt_toronto_venues['Postcode'] 

# move neighborhood column to the first column
fixed_columns = [dt_toronto_onehot.columns[-1]] + list(dt_toronto_onehot.columns[:-1])
dt_toronto_onehot = dt_toronto_onehot[fixed_columns]

dt_toronto_onehot.head()

In [None]:
dt_toronto_grouped = dt_toronto_onehot.groupby('Postcode').mean().reset_index()
dt_toronto_grouped

In [None]:
num_top_venues = 5

for hood in dt_toronto_grouped['Postcode']:
    print("----"+hood+"----")
    temp = dt_toronto_grouped[dt_toronto_grouped['Postcode'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

In [None]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [None]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
postcode_venues_sorted = pd.DataFrame(columns=columns)
postcode_venues_sorted['Postcode'] = dt_toronto_grouped['Postcode']

for ind in np.arange(dt_toronto_grouped.shape[0]):
    postcode_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dt_toronto_grouped.iloc[ind, :], num_top_venues)

postcode_venues_sorted

In [None]:
# set number of clusters
kclusters = 5

dt_toronto_grouped_clustering = dt_toronto_grouped.drop('Postcode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dt_toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

In [None]:
dt_toronto_merged = dt_toronto_data

# add clustering labels
dt_toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
dt_toronto_merged = dt_toronto_merged.join(postcode_venues_sorted.set_index('Postcode'), on='Postcode')

dt_toronto_merged.head() # check the last columns!

In [None]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dt_toronto_merged['Latitude'], dt_toronto_merged['Longitude'], dt_toronto_merged['Postcode'], dt_toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [73]:
dt_toronto_merged.loc[dt_toronto_merged['Cluster Labels'] == 0, dt_toronto_merged.columns[[1] + list(range(5, dt_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Downtown Toronto,0,Grocery Store,Café,Park,Diner,Nightclub,Convenience Store,Baby Store,Athletics & Sports,Italian Restaurant,Restaurant


In [75]:
dt_toronto_merged.loc[dt_toronto_merged['Cluster Labels'] == 1, dt_toronto_merged.columns[[1] + list(range(5, dt_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,1,Coffee Shop,Restaurant,Bakery,Indian Restaurant,Café,Italian Restaurant,Pub,Pizza Place,Gourmet Shop,Breakfast Spot
2,Downtown Toronto,1,Burger Joint,Gay Bar,Japanese Restaurant,Coffee Shop,Nightclub,Men's Store,Restaurant,Gastropub,Pub,Bookstore
5,Downtown Toronto,1,Coffee Shop,Gastropub,Hotel,Restaurant,Middle Eastern Restaurant,Cocktail Bar,Farmers Market,Italian Restaurant,Japanese Restaurant,Café
6,Downtown Toronto,1,Coffee Shop,Cocktail Bar,Farmers Market,Cheese Shop,Pub,Restaurant,Seafood Restaurant,Bakery,Steakhouse,Café
12,Downtown Toronto,1,Café,Bookstore,Japanese Restaurant,Bar,Restaurant,Bakery,Coffee Shop,College Gym,Comfort Food Restaurant,Italian Restaurant
13,Downtown Toronto,1,Café,Vegetarian / Vegan Restaurant,Mexican Restaurant,Vietnamese Restaurant,Dumpling Restaurant,Bakery,Chinese Restaurant,Comfort Food Restaurant,Caribbean Restaurant,Dessert Shop
15,Downtown Toronto,1,Café,Cocktail Bar,Restaurant,Pub,Seafood Restaurant,Hotel,Farmers Market,Creperie,Bakery,Jazz Club


In [76]:
dt_toronto_merged.loc[dt_toronto_merged['Cluster Labels'] == 2, dt_toronto_merged.columns[[1] + list(range(5, dt_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,2,Park,Playground,Trail,Yoga Studio,Concert Hall,Design Studio,Department Store,Deli / Bodega,Dance Studio,Creperie


In [77]:
dt_toronto_merged.loc[dt_toronto_merged['Cluster Labels'] == 3, dt_toronto_merged.columns[[1] + list(range(5, dt_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Downtown Toronto,3,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Mexican Restaurant,Theater,Café,Bank,Electronics Store
4,Downtown Toronto,3,Clothing Store,Café,Coffee Shop,Restaurant,Ramen Restaurant,Theater,American Restaurant,Sandwich Place,Japanese Restaurant,Hotel
7,Downtown Toronto,3,Coffee Shop,Café,Italian Restaurant,Bubble Tea Shop,Chinese Restaurant,Falafel Restaurant,Spa,Ice Cream Shop,Thai Restaurant,Sandwich Place
8,Downtown Toronto,3,Coffee Shop,American Restaurant,Café,Steakhouse,Breakfast Spot,Restaurant,Hotel,Asian Restaurant,Bar,Gastropub
9,Downtown Toronto,3,Coffee Shop,Aquarium,Park,Bar,Hotel,Pizza Place,Brewery,Café,Neighborhood,Indian Restaurant
10,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Gastropub,Deli / Bodega,Steakhouse,Hotel,Bar,Hotel Bar,Sandwich Place
11,Downtown Toronto,3,Coffee Shop,Café,Hotel,Deli / Bodega,Restaurant,Gastropub,American Restaurant,Gym,Pub,Bookstore
16,Downtown Toronto,3,Coffee Shop,Café,Deli / Bodega,Hotel,American Restaurant,Restaurant,Bar,Gastropub,Concert Hall,Steakhouse


In [78]:
dt_toronto_merged.loc[dt_toronto_merged['Cluster Labels'] == 4, dt_toronto_merged.columns[[1] + list(range(5, dt_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Downtown Toronto,4,Airport Lounge,Airport Service,Airport Terminal,Sculpture Garden,Airport,Airport Food Court,Airport Gate,Boat or Ferry,Harbor / Marina,Boutique
