# Capstone Project: Segmenting and Clustering Neighborhoods in Toronto

## Part 3 - Data Analysis

**NOTE**
I will be using only the central Toronto areas for my analysis along with the dataset with the coords I retrieved from LocationIQ

---
Import Modules

In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis

import requests # library to handle requests
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from geopy.geocoders import Nominatim

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

---
define any user-defined functions that we are going to use

In [2]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

---
declare any static data

In [3]:
CLIENT_ID = 'SN0VIJAY21QJWFCYLHZ40KWI3KNJGBQGKYKMWALYK4UFXM2C' # your Foursquare ID
CLIENT_SECRET = 'R4O5CJFTGFYSOXFXIUYMZODZLJLPKDQWGLKUL2EY4XA3P40I' # your Foursquare Secret
VERSION = '20180604'

---
Now we map the data and narrow the field we want to look at

In [4]:
df_T = pd.read_csv('TNC_myLatLon.csv').drop('Unnamed: 0', axis=1)
df_T.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7588,-79.320197
1,M4A,North York,Victoria Village,43.732658,-79.311189
2,M5A,Downtown Toronto,Regent Park,43.659933,-79.360509
3,M5A,Downtown Toronto,Harbourfront,43.654652,-79.381164
4,M6A,North York,Lawrence Manor,43.722079,-79.437507


In [5]:
address = 'Toronto, Ontario, Canada'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [10]:
map_toronto= folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, postal_code, neighbourhood in zip(df_T['Latitude'], df_T['Longitude'],
                                                df_T['Postal Code'], df_T['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, postal_code)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 

map_toronto

In [17]:
df_TC = df_T[df_T['Borough'].str.contains("Toronto")]
df_TC.groupby('Borough').count()

Unnamed: 0_level_0,Postal Code,Neighbourhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,18,18,18,18
Downtown Toronto,38,38,38,38
East Toronto,6,6,6,6
West Toronto,13,13,13,13


In [16]:
map_toronto= folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, postal_code, neighbourhood in zip(df_TC['Latitude'], df_TC['Longitude'],
                                                df_TC['Postal Code'], df_TC['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, postal_code)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=15,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 

map_toronto

In [21]:
df_TC = df_T[df_T['Borough']=='Downtown Toronto']
df_TC.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,Regent Park,43.659933,-79.360509
3,M5A,Downtown Toronto,Harbourfront,43.654652,-79.381164
6,M7A,Downtown Toronto,Queen's Park,43.660114,-79.389977
7,M7A,Downtown Toronto,Ontario Provincial Government,43.662804,-79.388735
15,M5B,Downtown Toronto,Garden District,43.652348,-79.378992


In [22]:
map_toronto= folium.Map(location=[latitude, longitude], zoom_start=14)

# add markers to map
for lat, lng, postal_code, neighbourhood in zip(df_TC['Latitude'], df_TC['Longitude'],
                                                df_TC['Postal Code'], df_TC['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, postal_code)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=15,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 

map_toronto

---
Lets say we are new University of Toronto students and see whats around university

In [24]:
address = 'University of Toronto, Toronto, Ontario, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of UoT are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of UoT are 43.663461999999996, -79.39775965337452.


In [26]:
radius = 500
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID,
                                                                                                                           CLIENT_SECRET, 
                                                                                                                           latitude, 
                                                                                                                           longitude,
                                                                                                                           VERSION, 
                                                                                                                           radius, 
                                                                                                                           LIMIT)
results = requests.get(url).json()

venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) # flatten JSON

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Hart House Theatre,Theater,43.663571,-79.394616
1,Philosopher's Walk,Park,43.666894,-79.395597
2,Yasu,Japanese Restaurant,43.662837,-79.403217
3,Queen's Park,Park,43.663946,-79.39218
4,Innis Cafe,Café,43.665401,-79.399715


In [27]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

31 venues were returned by Foursquare.
