# Segmenting and Clustering Neighborhoods in Scaborough, Toronto

## 1. Webscraping, Creating Dataframe 

In [1]:
!pip install bs4
#!pip install plotly



In [2]:
# import libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
# Use the requests library to download the webpage https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M . Save the text of the response as a variable named html_data.
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html_data = requests.get(url).text

In [4]:
# Parse the html data using beautiful_soup.
soup = BeautifulSoup(html_data,'html5lib')

In [5]:
# Create a list
# More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. 
# These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.
table_contents=[]
table = soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = ((row.span.text).split('(')[0])
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

In [6]:
# Create dataframe
df=pd.DataFrame(table_contents)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [18]:
df.shape

(103, 3)

In [8]:
scarborough = df[df['Borough']=='Scarborough'].reset_index(drop=True)
scarborough

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


##  2. Create Dataframe including Latitude and longitude

In [9]:
# Read file csv
import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.

if os.environ.get('RUNTIME_ENV_LOCATION_TYPE') == 'external':
    endpoint_3f2c2ed85b9741a481eb081052fbb645 = 'https://s3.ap-geo.objectstorage.softlayer.net'
else:
    endpoint_3f2c2ed85b9741a481eb081052fbb645 = 'https://s3.ap-geo.objectstorage.service.networklayer.com'

client_3f2c2ed85b9741a481eb081052fbb645 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='ociaJP_GFQ6a51dO32m8wPMCrgAnO2kRVqdRhcQA1TOt',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url=endpoint_3f2c2ed85b9741a481eb081052fbb645)

body = client_3f2c2ed85b9741a481eb081052fbb645.get_object(Bucket='nhux27scapstoneprojectnotebook-donotdelete-pr-eb1tbwfm43fobt',Key='Scarborough.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_scar = pd.read_csv(body)
df_scar.head()


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.8113,-79.193
1,M1C,43.7878,-79.1564
2,M1E,43.7678,-79.1866
3,M1G,43.7712,-79.2144
4,M1H,43.7686,-79.2389


In [18]:
# Rename columns "PostalCode" of df to "Postal Code"
scarborough.rename(columns={'PostalCode':'Postal Code'}, inplace = True)
scarborough.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [19]:
# Merge 2 dataframe scarborough and df_scar
scarbo = scarborough
scarbo = scarbo.join(df_scar.set_index('Postal Code'), on='Postal Code')
scarbo.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.8113,-79.193
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.7878,-79.1564
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.7712,-79.2144
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389


## 3. Segmenting and Clustering neighborhood Toronto

In [20]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


#### Define Foursquare Credential

In [21]:
CLIENT_ID = 'QUBW5BOO5DQPXKQ22WN11X3YHTG0USXB4Z3FOPDQHY' # your Foursquare ID
CLIENT_SECRET = 'QIBYJ1TQLC5LRYXKZC1HEUXEE5PJX4EGUBI4FTOFGHQEFR2Q' # your Foursquare Secret
ACCESS_TOKEN = '0Q0R5SC1XFEKYI2PEAIMIAPUNXZR1HOVB1N1D1ZHJPHGMODC' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QUBW5BOO5DQPXKQ22WN11X3YHTG0USXB4Z3FOPDQHY
CLIENT_SECRET:QIBYJ1TQLC5LRYXKZC1HEUXEE5PJX4EGUBI4FTOFGHQEFR2Q


##### Use geopy library to get the latitude and longitude values of Toronto City

##### Cluster neighborhoods in Scarborough

##### Let's get the geographical coordinates of Scarborough

In [23]:
address = 'Scarborough, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.7729744, -79.2576479.


##### Let's visualizat Scarborough and the neighborhoods in it

In [24]:
# create map of Scarborough using latitude and longitude values
map_scarborough = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(scarbo['Latitude'], scarbo['Longitude'], scarbo['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scarborough)  
    
map_scarborough

### Explore Neighborhoods in Scarborough

In [25]:
# Define function to get nearby venues of all neighborhoods in Scarborough
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&oauth_token={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            ACCESS_TOKEN,
            CLIENT_SECRET, 
            lat, 
            lng, 
            VERSION, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
print('done')

done


In [26]:
# Now write the code to run the above function on each neighborhood and create a new dataframe called _northyorkvenues

# type your answer here
scarborough_venues = getNearbyVenues(names=scarbo['Neighborhood'],
                                   latitudes=scarbo['Latitude'],
                                   longitudes=scarbo['Longitude']
                                  )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge


In [60]:
print(scarborough_venues.shape)
scarborough_venues

(212, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.8113,-79.193,Canadian Appliance Source Whitby,43.808353,-79.191331,Home Service
1,"Malvern, Rouge",43.8113,-79.193,R & K Woodworking Specialists Inc,43.808233,-79.196857,Construction & Landscaping
2,"Rouge Hill, Port Union, Highland Creek",43.7878,-79.1564,Fox and Fiddle,43.789082,-79.154459,Bar
3,"Rouge Hill, Port Union, Highland Creek",43.7878,-79.1564,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
4,"Rouge Hill, Port Union, Highland Creek",43.7878,-79.1564,Scarborough Historical Society,43.788755,-79.162438,History Museum
5,"Guildwood, Morningside, West Hill",43.7678,-79.1866,Chick-N-Joy,43.768752,-79.187982,Fried Chicken Joint
6,"Guildwood, Morningside, West Hill",43.7678,-79.1866,Little Caesars Pizza,43.769046,-79.184386,Pizza Place
7,"Guildwood, Morningside, West Hill",43.7678,-79.1866,Bulk Barn,43.771342,-79.184341,Food & Drink Shop
8,"Guildwood, Morningside, West Hill",43.7678,-79.1866,LCBO,43.771462,-79.184384,Liquor Store
9,"Guildwood, Morningside, West Hill",43.7678,-79.1866,Booster Juice,43.770668,-79.18415,Smoothie Shop


In [68]:
x= scarborough_venues[scarborough_venues['Venue Category']== 'Optical Shop'].reset_index(drop=True)
x

# scarborough = df[df['Borough']=='Scarborough'].reset_index(drop=True)
# scarborough

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Guildwood, Morningside, West Hill",43.7678,-79.1866,Hakim Optical,43.769721,-79.187106,Optical Shop


In [28]:
# Let's check how many venues were returned for each neighborhood
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,8,8,8,8,8,8
"Birch Cliff, Cliffside West",4,4,4,4,4,4
Cedarbrae,4,4,4,4,4,4
"Clarks Corners, Tam O'Shanter, Sullivan",21,21,21,21,21,21
"Cliffside, Cliffcrest, Scarborough Village West",16,16,16,16,16,16
"Dorset Park, Wexford Heights, Scarborough Town Centre",6,6,6,6,6,6
"Golden Mile, Clairlea, Oakridge",13,13,13,13,13,13
"Guildwood, Morningside, West Hill",60,60,60,60,60,60
"Kennedy Park, Ionview, East Birchmount Park",26,26,26,26,26,26
"Malvern, Rouge",2,2,2,2,2,2


In [29]:
# Let's check how many Categories
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 95 uniques categories.


### Analyze Each Neighborhood


In [30]:

# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,ATM,Auto Garage,Badminton Court,Bakery,Bank,Bar,Beer Store,Bistro,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Bus Line,Bus Station,Bus Stop,Business Service,Café,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Construction & Landscaping,Convenience Store,Cosmetics Shop,Department Store,Discount Store,Donut Shop,Electronics Store,Fast Food Restaurant,Filipino Restaurant,Financial or Legal Service,Fireworks Store,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Gas Station,General Entertainment,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym Pool,Hardware Store,History Museum,Hobby Shop,Hockey Arena,Home Service,Ice Cream Shop,Insurance Office,Intersection,Italian Restaurant,Jewelry Store,Latin American Restaurant,Laundromat,Light Rail Station,Liquor Store,Lounge,Medical Center,Medical Supply Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Newsagent,Noodle House,Optical Shop,Other Great Outdoors,Other Repair Shop,Park,Pet Store,Pharmacy,Pizza Place,Rental Car Location,Rental Service,Restaurant,Salon / Barbershop,Sandwich Place,Shanghai Restaurant,Shopping Mall,Skating Rink,Smoothie Shop,Soccer Field,Spa,Sports Bar,Supermarket,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Video Game Store,Wine Shop
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [31]:
scarborough_onehot.shape

(212, 96)

In [32]:
# Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()

In [33]:
scarborough_grouped.head()

Unnamed: 0,Neighborhood,ATM,Auto Garage,Badminton Court,Bakery,Bank,Bar,Beer Store,Bistro,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Bus Line,Bus Station,Bus Stop,Business Service,Café,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Construction & Landscaping,Convenience Store,Cosmetics Shop,Department Store,Discount Store,Donut Shop,Electronics Store,Fast Food Restaurant,Filipino Restaurant,Financial or Legal Service,Fireworks Store,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Gas Station,General Entertainment,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym Pool,Hardware Store,History Museum,Hobby Shop,Hockey Arena,Home Service,Ice Cream Shop,Insurance Office,Intersection,Italian Restaurant,Jewelry Store,Latin American Restaurant,Laundromat,Light Rail Station,Liquor Store,Lounge,Medical Center,Medical Supply Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Newsagent,Noodle House,Optical Shop,Other Great Outdoors,Other Repair Shop,Park,Pet Store,Pharmacy,Pizza Place,Rental Car Location,Rental Service,Restaurant,Salon / Barbershop,Sandwich Place,Shanghai Restaurant,Shopping Mall,Skating Rink,Smoothie Shop,Soccer Field,Spa,Sports Bar,Supermarket,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station,Video Game Store,Wine Shop
0,Agincourt,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
3,"Clarks Corners, Tam O'Shanter, Sullivan",0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.047619,0.0,0.047619,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.047619,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.095238,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0
4,"Cliffside, Cliffcrest, Scarborough Village West",0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
# Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in scarborough_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = scarborough_grouped[scarborough_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0        Shanghai Restaurant  0.12
1            Badminton Court  0.12
2             Hardware Store  0.12
3                  Newsagent  0.12
4  Latin American Restaurant  0.12


----Birch Cliff, Cliffside West----
                   venue  freq
0        College Stadium  0.25
1                   Café  0.25
2  General Entertainment  0.25
3           Skating Rink  0.25
4                    ATM  0.00


----Cedarbrae----
                        venue  freq
0                       Trail  0.25
1  Construction & Landscaping  0.25
2                      Lounge  0.25
3                 Gaming Cafe  0.25
4                         ATM  0.00


----Clarks Corners, Tam O'Shanter, Sullivan----
                  venue  freq
0           Pizza Place  0.10
1  Fast Food Restaurant  0.10
2     Convenience Store  0.10
3              Bus Stop  0.05
4           Gas Station  0.05


----Cliffside, Cliffcrest, Scarborough Village West----
            venue  freq


In [None]:
##Let's put that into a pandas dataframe
##First, let's write a function to sort the venues in descending order.

In [35]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [37]:
#let's create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Shanghai Restaurant,Badminton Court,Hardware Store,Newsagent,Latin American Restaurant,Breakfast Spot,Construction & Landscaping,Skating Rink,ATM,Optical Shop
1,"Birch Cliff, Cliffside West",College Stadium,Café,General Entertainment,Skating Rink,ATM,Medical Supply Store,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop
2,Cedarbrae,Trail,Construction & Landscaping,Lounge,Gaming Cafe,ATM,Medical Supply Store,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop
3,"Clarks Corners, Tam O'Shanter, Sullivan",Pizza Place,Fast Food Restaurant,Convenience Store,Bus Stop,Gas Station,Fried Chicken Joint,Flower Shop,Pharmacy,Italian Restaurant,Rental Car Location
4,"Cliffside, Cliffcrest, Scarborough Village West",Ice Cream Shop,Gift Shop,Pharmacy,Spa,Liquor Store,Electronics Store,Discount Store,Pizza Place,Coffee Shop,Sandwich Place


###  Cluster Neighborhoods


##### Run k-means to cluster the neighborhood into 5 clusters.

In [38]:
# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 2], dtype=int32)

##### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [42]:

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged = scarbo

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

scarborough_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.8113,-79.193,2.0,2.0,Home Service,Construction & Landscaping,Other Repair Shop,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant,Mexican Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.7878,-79.1564,0.0,0.0,History Museum,Bar,Golf Course,ATM,Medical Center,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop,Miscellaneous Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7678,-79.1866,1.0,1.0,Pizza Place,Electronics Store,Grocery Store,Restaurant,Fast Food Restaurant,Medical Center,Coffee Shop,Pharmacy,Greek Restaurant,Bank
3,M1G,Scarborough,Woburn,43.7712,-79.2144,4.0,4.0,Convenience Store,Insurance Office,Other Repair Shop,ATM,Medical Supply Store,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop,Miscellaneous Shop
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389,1.0,1.0,Trail,Construction & Landscaping,Lounge,Gaming Cafe,ATM,Medical Supply Store,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop


In [46]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Neighborhood'], scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

###### Cluster 1 

In [54]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 0, scarborough_merged.columns[[2] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Rouge Hill, Port Union, Highland Creek",0.0,0.0,History Museum,Bar,Golf Course,ATM,Medical Center,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop,Miscellaneous Shop


###### Cluster 2 

In [55]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 1, scarborough_merged.columns[[2] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Guildwood, Morningside, West Hill",1.0,1.0,Pizza Place,Electronics Store,Grocery Store,Restaurant,Fast Food Restaurant,Medical Center,Coffee Shop,Pharmacy,Greek Restaurant,Bank
4,Cedarbrae,1.0,1.0,Trail,Construction & Landscaping,Lounge,Gaming Cafe,ATM,Medical Supply Store,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop
5,Scarborough Village,1.0,1.0,Park,Convenience Store,Grocery Store,Flower Shop,Spa,Medical Supply Store,Noodle House,Newsagent,Mobile Phone Shop,Miscellaneous Shop
6,"Kennedy Park, Ionview, East Birchmount Park",1.0,1.0,Pharmacy,Discount Store,Coffee Shop,Convenience Store,Hobby Shop,Chinese Restaurant,Light Rail Station,Hockey Arena,Auto Garage,Grocery Store
7,"Golden Mile, Clairlea, Oakridge",1.0,1.0,Intersection,Bakery,Coffee Shop,Bus Line,Metro Station,Business Service,Park,Bus Station,Soccer Field,Middle Eastern Restaurant
8,"Cliffside, Cliffcrest, Scarborough Village West",1.0,1.0,Ice Cream Shop,Gift Shop,Pharmacy,Spa,Liquor Store,Electronics Store,Discount Store,Pizza Place,Coffee Shop,Sandwich Place
9,"Birch Cliff, Cliffside West",1.0,1.0,College Stadium,Café,General Entertainment,Skating Rink,ATM,Medical Supply Store,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop
10,"Dorset Park, Wexford Heights, Scarborough Town...",1.0,1.0,Wine Shop,Bakery,Construction & Landscaping,Rental Service,Brewery,Other Repair Shop,Metro Station,Optical Shop,Noodle House,Newsagent
11,"Wexford, Maryvale",1.0,1.0,Home Service,Middle Eastern Restaurant,Convenience Store,Construction & Landscaping,Miscellaneous Shop,Electronics Store,Auto Garage,Intersection,Jewelry Store,Latin American Restaurant
12,Agincourt,1.0,1.0,Shanghai Restaurant,Badminton Court,Hardware Store,Newsagent,Latin American Restaurant,Breakfast Spot,Construction & Landscaping,Skating Rink,ATM,Optical Shop


###### Cluster 3 

In [56]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 2, scarborough_merged.columns[[2] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Malvern, Rouge",2.0,2.0,Home Service,Construction & Landscaping,Other Repair Shop,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant,Mexican Restaurant


###### Cluster 4

In [57]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 3, scarborough_merged.columns[[2] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,"Milliken, Agincourt North, Steeles East, L'Amo...",3.0,3.0,Pharmacy,Intersection,ATM,Medical Supply Store,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant


###### Cluster 5

In [58]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 4, scarborough_merged.columns[[2] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,Cluster Label,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Woburn,4.0,4.0,Convenience Store,Insurance Office,Other Repair Shop,ATM,Medical Supply Store,Optical Shop,Noodle House,Newsagent,Mobile Phone Shop,Miscellaneous Shop
