## 1. Import Libaries

In [4]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis


import json # library to handle JSON files


from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## 2. Import data from pickled file

In [5]:
df_post = pd.read_pickle('toronto_ll.pkl')
print('DataFrame imported.')

DataFrame imported.


## 3. Analyize data of Toronto

#### Get the coordinates of Toronto, using openstreetmap

In [59]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_boroughs")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


#### Extract only boroughs including the word 'Toronto' and visualize these boroughs

In [15]:
toronto_data = df_post[df_post['Borough'].str.contains('Toronto')].reset_index(drop=True)

In [60]:
# create map of New York using latitude and longitude values
toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], \
                                           toronto_data['Longitude'], \
                                           toronto_data['Borough'], \
                                           toronto_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto)  
    
toronto

In [33]:
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


## 4. Using Foursquare to explore the first neighbourhood, namely 'the beaches'

Setting up client credentials

In [34]:
CLIENT_ID = 'HIDDEN' 
CLIENT_SECRET = 'HIDDEN' 
VERSION = '20190405' 

print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

CLIENT_ID: A4RT0PUHG5XOK41LNFS0ER2T3JCEJLZY0RM2PS4KK245NCJE
CLIENT_SECRET:AFYFZXUFYKZFRMSRVDLVVXZ3GM23KRNRYU2EF2VU3GHEFH5Y


Get the coordinates of the first neighbourhood

In [35]:
east_toronto_name = toronto_data.loc[0, 'Neighbourhood']
east_toronto_la = toronto_data.loc[0, 'Latitude']
east_toronto_lo = toronto_data.loc[0, 'Longitude']

Using foursquare api to get the venues

In [36]:
radius = 300
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(\
                                CLIENT_ID, \
                                CLIENT_SECRET, \
                                east_toronto_la, \
                                east_toronto_lo, \
                                VERSION, \
                                radius, \
                                LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?client_id=A4RT0PUHG5XOK41LNFS0ER2T3JCEJLZY0RM2PS4KK245NCJE&client_secret=AFYFZXUFYKZFRMSRVDLVVXZ3GM23KRNRYU2EF2VU3GHEFH5Y&ll=43.67635739999999,-79.2930312&v=20190405&radius=300&limit=100'

Clear up the results using methods done in the lab

In [37]:
results = requests.get(url).json()

In [38]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [39]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Glen Stewart Park,Park,43.675278,-79.294647
1,Glen Stewart Ravine,Other Great Outdoors,43.6763,-79.294784
2,Glen Manor Ravine,Trail,43.676821,-79.293942


In [40]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.


### visualize the venues around the neighbourhood

In [47]:


# create map of New York using latitude and longitude values
the_beaches = folium.Map(location=[east_toronto_la, east_toronto_lo], zoom_start=17)

# add markers to map
for lat, lng, name, cat in zip(nearby_venues['lat'], \
                                           nearby_venues['lng'], \
                                           nearby_venues['name'], \
                                           nearby_venues['categories']):
    label = '{}, {}'.format(name, cat)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=15,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.3,
        parse_html=False).add_to(the_beaches)  
    
the_beaches

## 5. Cluster Analysis

In [48]:
k = 5

toronto_cluster = toronto_data[['Latitude', 'Longitude']]

# run k-means clustering
km_cluster = KMeans(n_clusters=k, random_state=0)
km_cluster.fit(toronto_cluster)

# check cluster labels generated for each row in the dataframe
km_cluster.labels_[0:10]

array([3, 3, 3, 3, 1, 1, 1, 1, 1, 1])

The results are returned through .labels_, then combined with the orginial toronto dataframe

In [56]:
# add clustering labels
toronto_labels = pd.DataFrame(km_cluster.labels_, columns=['Cluster Labels'])
toronto_labels.head()

Unnamed: 0,Cluster Labels
0,3
1,3
2,3
3,3
4,1


In [53]:
toronto_merged = pd.concat([toronto_data, toronto_labels], axis=1)

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,3
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,3
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,3
3,M4M,East Toronto,Studio District,43.659526,-79.340923,3
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,1


### Visualize the neighbourhoods of these boroughs using Folium

In [55]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], \
                                  toronto_merged['Longitude'], \
                                  toronto_merged['Neighbourhood'], \
                                  toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters