# Segmenting and Clustering of Toronto Neighborhoods Part 3

### Load relevant packages

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import requests

!conda install -c conda-forge BeautifulSoup4 --yes
from bs4 import BeautifulSoup

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


### Create the DataFrame from the previous part 1 & part 2

In [3]:
def get_header(table):
    headers = []
    for th in table.find("tr").find_all("th"):
        headers.append(th.text.strip())
    return headers

def get_rows(table):
    rows = []
    for tr in table.find_all("tr")[1:]:
        row_entry = []
        tds = tr.find_all("td")
        
        for td in tds:
            row_entry.append(td.text.strip())
            
        rows.append(row_entry)
    return rows


postal_codes_page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').content
soup = BeautifulSoup(postal_codes_page,'html.parser')

postal_codes_table = soup.find('table')
headers = get_header(postal_codes_table)
rows = get_rows(postal_codes_table)
df = pd.DataFrame(rows, columns=headers)

df.drop(df.loc[df['Borough']=='Not assigned'].index, inplace=True)

df = df.groupby(['Postal Code','Borough'], sort=False).agg({'Neighborhood': ', '.join,}).reset_index()

df.loc[(df['Neighborhood'] == 'Not assigned'),'Neighborhood'] = df['Borough']

df_geo = pd.read_csv('http://cocl.us/Geospatial_data')

df = pd.merge(df, df_geo[['Postal Code','Latitude', 'Longitude']], on='Postal Code')

df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Setup credentials for Foursquare

In [53]:
CLIENT_ID = '############' # your Foursquare ID
CLIENT_SECRET = '########' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 30

### Explore and Cluster a Neighborhood

In [35]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [51]:
# create map
map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)  
    
map

### Call FourSquare API and get the coffee shops for one of the neighborhoods

In [41]:
search_query = 'Coffee'
radius = 500
print(search_query + ' .... OK!')

Coffee .... OK!


In [42]:
latitude = df.loc[df['Postal Code'] == 'M5A', 'Latitude'].values[0]
longitude = df.loc[df['Postal Code'] == 'M5A', 'Longitude'].values[0]

In [52]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)

### Parse the results from the FourSquare API and generate a DataFrame

In [49]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = pd.json_normalize(venues)

# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered.head()

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,crossStreet,id
0,Arena Coffee Bar,Coffee Shop,15 Tank House Lane,43.65028,-79.35886,"[{'label': 'display', 'lat': 43.65028, 'lng': ...",465,M5A 3C4,CA,Toronto,ON,Canada,"[15 Tank House Lane, Toronto ON M5A 3C4, Canada]",,58bdc3a23ef0f629212f1f70
1,Tandem Coffee,Coffee Shop,368 King St E,43.653559,-79.361809,"[{'label': 'display', 'lat': 43.65355870959944...",122,,CA,Toronto,ON,Canada,"[368 King St E (at Trinity St), Toronto ON, Ca...",at Trinity St,53b8466a498e83df908c3f21
2,Coffee Time,Coffee Shop,142 Parliament St.,43.654541,-79.364147,"[{'label': 'display', 'lat': 43.65454122520666...",284,,CA,Toronto,ON,Canada,"[142 Parliament St. (Richmond St.), Toronto ON...",Richmond St.,4d9f1825a77d816e4501fa08
3,Coffee Enterprise,Tech Startup,550 Queen Street East,43.65729,-79.357592,"[{'label': 'display', 'lat': 43.65729010071037...",417,,CA,Toronto,ON,Canada,"[550 Queen Street East, Toronto ON, Canada]",,54cb8e59498e14b4a8ab2aba
4,Coffee Mobile - Brand Partners,Tech Startup,550 Queen Street East,43.657487,-79.35693,"[{'label': 'display', 'lat': 43.6574868268064,...",467,,CA,Toronto,ON,Canada,"[550 Queen Street East (River), Toronto ON, Ca...",River,55fc340d498e3e59b6211dc0


### Display a map of the coffee shops

In [50]:
dataframe_filtered.name

venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the location

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the coffee shops as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map