In [4]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## Notes:
- As I noted in the second question, I saved the result of Toronto Latitude and Longitude to the file "Toronto_Latitude_Longitude.csv", and now I read this to data frame

In [7]:
neighborhoods = pd.read_csv("Toronto_Latitude_Longitude.csv", index_col=0)

In [8]:
neighborhoods.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.686504,-79.279742
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7871,-79.157666
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763592,-79.162531
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.770175,-79.239475


In [9]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 10 boroughs and 102 neighborhoods.


# Use geopy library to get the latitude and longitude values of Toronto, Ontario.

In [10]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


# Create a map of Toronto with neighborhoods superimposed on top

In [15]:
# create map of New York using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

# I choose Borough = "Scarborough" to analyse
- For illustration purposes, let's simplify the above map and segment and cluster only the neighborhoods in Scarboroug. So let's slice the original dataframe and create a new dataframe of the Scarboroug data.

In [13]:
Scarborough_data = neighborhoods[neighborhoods['Borough'] == 'Scarborough'].reset_index(drop=True)
Scarborough_data

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.686504,-79.279742
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7871,-79.157666
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763592,-79.162531
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.770175,-79.239475
5,M1J,Scarborough,Scarborough Village,43.740651,-79.239109
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.713967,-79.281622
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.724713,-79.228194
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


#### Let's get the geographical coordinates of Scarborough

In [14]:
address = 'Scarborough, , Ontario'

geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


#### As we did with all of Toronto, let's visualizat Scarborough the neighborhoods in it.

In [16]:
# create map of Manhattan using latitude and longitude values
map_Scarborough = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Scarborough_data['Latitude'], Scarborough_data['Longitude'], Scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Scarborough)  
    
map_Scarborough

#### Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.
### Define Foursquare Credentials and Version

In [17]:
CLIENT_ID = 'Q1FRAXZULEO3ESVPW2R0E1OJ0G30MF0D3X1JBPKOJB2KV0J5' # your Foursquare ID
CLIENT_SECRET = 'GXWRUQGK3FWR0E3D55TPHKMOYRKMDKC1OTZV10FJALW2M3QH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: Q1FRAXZULEO3ESVPW2R0E1OJ0G30MF0D3X1JBPKOJB2KV0J5
CLIENT_SECRET:GXWRUQGK3FWR0E3D55TPHKMOYRKMDKC1OTZV10FJALW2M3QH


#### Let's explore the first neighborhood in our dataframe.

- Get the neighborhood's name.

In [19]:
Scarborough_data.loc[0, 'Neighbourhood']

'Rouge, Malvern'

- Get the neighborhood's latitude and longitude values.

In [21]:
neighborhood_latitude = Scarborough_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Scarborough_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Scarborough_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge, Malvern are 43.686504, -79.279742.


#### Now, let's get the top 100 venues that are in 'Rouge, Malvern' within a radius of 500 meters.

- First, let's create the GET request URL. Name your URL url.

In [22]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=Q1FRAXZULEO3ESVPW2R0E1OJ0G30MF0D3X1JBPKOJB2KV0J5&client_secret=GXWRUQGK3FWR0E3D55TPHKMOYRKMDKC1OTZV10FJALW2M3QH&v=20180605&ll=43.686504,-79.279742&radius=500&limit=100'

- Send the GET request and examine the resutls

In [23]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c779d759fb6b7415b0f7961'},
 'response': {'headerLocation': 'Birch Cliff',
  'headerFullLocation': 'Birch Cliff, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.6910040045, 'lng': -79.27353066410198},
   'sw': {'lat': 43.6820039955, 'lng': -79.28595333589801}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d70e964b48c236a151bd557',
       'name': 'FreshCo',
       'location': {'address': '2490 Gerrard St E',
        'crossStreet': 'Victoria Park',
        'lat': 43.687779840325845,
        'lng': -79.28548265751691,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.687779840325845,
          'lng': -79.28548265751691}],
        'distance': 483,
 

#### From the Foursquare lab in the previous module, we know that all the information is in the items key. Before we proceed, let's borrow the get_category_type function from the Foursquare lab.

In [25]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Now we are ready to clean the json and structure it into a pandas dataframe.

In [26]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,FreshCo,Grocery Store,43.68778,-79.285483
1,Blantyre Park,Playground,43.682842,-79.280639
2,Play It Again Sports,Sporting Goods Shop,43.687545,-79.284906
3,Rogers,Video Store,43.687451,-79.285241
4,The Quarry Cafe,Diner,43.689141,-79.278008


#### And how many venues were returned by Foursquare?

In [28]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

5 venues were returned by Foursquare.


## Explore Neighborhoods in Scarborough

#### Let's create a function to repeat the same process to all the neighborhoods in Scarborough

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now write the code to run the above function on each neighborhood and create a new dataframe called Scarborough_venues.

In [30]:
Scarborough_venues = getNearbyVenues(names=Scarborough_data['Neighbourhood'],
                                   latitudes=Scarborough_data['Latitude'],
                                   longitudes=Scarborough_data['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West, Steeles West
Upper Rouge


#### Let's check the size of the resulting dataframe

In [31]:
print(Scarborough_venues.shape)
Scarborough_venues.head()

(115, 7)


Unnamed: 0,Neighbourhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.686504,-79.279742,FreshCo,43.68778,-79.285483,Grocery Store
1,"Rouge, Malvern",43.686504,-79.279742,Blantyre Park,43.682842,-79.280639,Playground
2,"Rouge, Malvern",43.686504,-79.279742,Play It Again Sports,43.687545,-79.284906,Sporting Goods Shop
3,"Rouge, Malvern",43.686504,-79.279742,Rogers,43.687451,-79.285241,Video Store
4,"Rouge, Malvern",43.686504,-79.279742,The Quarry Cafe,43.689141,-79.278008,Diner


#### Let's check how many venues were returned for each neighborhood

In [33]:
Scarborough_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,3,3,3,3,3,3
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",7,7,7,7,7,7
"Birch Cliff, Cliffside West",4,4,4,4,4,4
Cedarbrae,2,2,2,2,2,2
"Clairlea, Golden Mile, Oakridge",10,10,10,10,10,10
"Clarks Corners, Sullivan, Tam O'Shanter",4,4,4,4,4,4
"Cliffcrest, Cliffside, Scarborough Village West",7,7,7,7,7,7
"Dorset Park, Scarborough Town Centre, Wexford Heights",16,16,16,16,16,16
"East Birchmount Park, Ionview, Kennedy Park",7,7,7,7,7,7
"Guildwood, Morningside, West Hill",4,4,4,4,4,4


#### Let's find out how many unique categories can be curated from all the returned venues

In [34]:
print('There are {} uniques categories.'.format(len(Scarborough_venues['Venue Category'].unique())))

There are 60 uniques categories.


# Analyze Each Neighborhood

In [35]:
# one hot encoding
Scarborough_onehot = pd.get_dummies(Scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Scarborough_onehot['Neighbourhood'] = Scarborough_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [Scarborough_onehot.columns[-1]] + list(Scarborough_onehot.columns[:-1])
Scarborough_onehot = Scarborough_onehot[fixed_columns]

Scarborough_onehot.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,Athletics & Sports,Auto Workshop,Bakery,Bank,Bar,Bistro,Breakfast Spot,Burger Joint,Bus Line,Bus Station,Business Service,Café,Caribbean Restaurant,Chinese Restaurant,Chocolate Shop,Coffee Shop,College Stadium,Convenience Store,Department Store,Diner,Discount Store,Electronics Store,Fast Food Restaurant,Fish Market,Flower Shop,Fried Chicken Joint,General Entertainment,Grocery Store,Gym,Hobby Shop,Hookah Bar,Indian Restaurant,Intersection,Japanese Restaurant,Korean Restaurant,Light Rail Station,Liquor Store,Metro Station,Middle Eastern Restaurant,Moving Target,Other Great Outdoors,Park,Pharmacy,Pizza Place,Playground,Rental Car Location,Restaurant,Sandwich Place,Seafood Restaurant,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Sporting Goods Shop,Sports Bar,Thai Restaurant,Train Station,Video Store,Vietnamese Restaurant
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
3,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### And let's examine the new dataframe size.

In [36]:
Scarborough_onehot.shape

(115, 61)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [38]:
Scarborough_grouped = Scarborough_onehot.groupby('Neighbourhood').mean().reset_index()
Scarborough_grouped

Unnamed: 0,Neighbourhood,Afghan Restaurant,Athletics & Sports,Auto Workshop,Bakery,Bank,Bar,Bistro,Breakfast Spot,Burger Joint,Bus Line,Bus Station,Business Service,Café,Caribbean Restaurant,Chinese Restaurant,Chocolate Shop,Coffee Shop,College Stadium,Convenience Store,Department Store,Diner,Discount Store,Electronics Store,Fast Food Restaurant,Fish Market,Flower Shop,Fried Chicken Joint,General Entertainment,Grocery Store,Gym,Hobby Shop,Hookah Bar,Indian Restaurant,Intersection,Japanese Restaurant,Korean Restaurant,Light Rail Station,Liquor Store,Metro Station,Middle Eastern Restaurant,Moving Target,Other Great Outdoors,Park,Pharmacy,Pizza Place,Playground,Rental Car Location,Restaurant,Sandwich Place,Seafood Restaurant,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Sporting Goods Shop,Sports Bar,Thai Restaurant,Train Station,Video Store,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North, L'Amoreaux East, Milliken, St...",0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857
2,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0
4,"Clairlea, Golden Mile, Oakridge",0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0
5,"Clarks Corners, Sullivan, Tam O'Shanter",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Cliffcrest, Cliffside, Scarborough Village West",0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Dorset Park, Scarborough Town Centre, Wexford ...",0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0625,0.0,0.125,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"East Birchmount Park, Ionview, Kennedy Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.142857,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0
9,"Guildwood, Morningside, West Hill",0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's confirm the new size

In [39]:
Scarborough_grouped.shape

(17, 61)

#### Let's print each neighborhood along with the top 3 most common venues

In [41]:
num_top_venues = 3

for hood in Scarborough_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = Scarborough_grouped[Scarborough_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
               venue  freq
0               Park  0.67
1     Breakfast Spot  0.33
2  Afghan Restaurant  0.00


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
               venue  freq
0  Afghan Restaurant  0.14
1     Chocolate Shop  0.14
2      Train Station  0.14


----Birch Cliff, Cliffside West----
                   venue  freq
0        College Stadium  0.25
1  General Entertainment  0.25
2           Skating Rink  0.25


----Cedarbrae----
             venue  freq
0       Playground   0.5
1  Thai Restaurant   0.5
2      Pizza Place   0.0


----Clairlea, Golden Mile, Oakridge----
         venue  freq
0  Coffee Shop   0.2
1       Bakery   0.2
2     Bus Line   0.2


----Clarks Corners, Sullivan, Tam O'Shanter----
           venue  freq
0    Coffee Shop  0.25
1   Intersection  0.25
2  Shopping Mall  0.25


----Cliffcrest, Cliffside, Scarborough Village West----
            venue  freq
0     Coffee Shop  0.14
1        Pharmacy  0.14
2  Sandwich Place  

#### Let's put that into a pandas dataframe

- First, let's write a function to sort the venues in descending order.

In [42]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [44]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = Scarborough_grouped['Neighbourhood']

for ind in np.arange(Scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Park,Breakfast Spot,Vietnamese Restaurant,Chocolate Shop,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store,Discount Store
1,"Agincourt North, L'Amoreaux East, Milliken, St...",Vietnamese Restaurant,Sandwich Place,Chocolate Shop,Fast Food Restaurant,Gym,Afghan Restaurant,Train Station,Caribbean Restaurant,Fish Market,Thai Restaurant
2,"Birch Cliff, Cliffside West",General Entertainment,Skating Rink,College Stadium,Café,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store
3,Cedarbrae,Thai Restaurant,Playground,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store,Discount Store
4,"Clairlea, Golden Mile, Oakridge",Bus Line,Bakery,Coffee Shop,Metro Station,Soccer Field,Intersection,Bus Station,Vietnamese Restaurant,Convenience Store,Department Store


In [45]:
neighborhoods_venues_sorted.shape

(17, 11)

# Cluster Neighborhoods

#### Run k-means to cluster the neighborhood into 3 clusters.

In [47]:
# set number of clusters
kclusters = 3

Scarborough_grouped_clustering = Scarborough_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0], dtype=int32)

#### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [50]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Scarborough_merged = Scarborough_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Scarborough_merged = Scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

Scarborough_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.686504,-79.279742,0,Grocery Store,Diner,Video Store,Playground,Sporting Goods Shop,Coffee Shop,College Stadium,Convenience Store,Department Store,Electronics Store
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7871,-79.157666,2,Moving Target,Vietnamese Restaurant,Grocery Store,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store,Discount Store,Diner
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763592,-79.162531,0,Gym,Athletics & Sports,Business Service,Park,Chocolate Shop,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Soccer Field,Korean Restaurant,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store
4,M1H,Scarborough,Cedarbrae,43.770175,-79.239475,0,Thai Restaurant,Playground,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store,Discount Store


#### Finally, let's visualize the resulting clusters

In [52]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Scarborough_merged['Latitude'], Scarborough_merged['Longitude'], Scarborough_merged['Neighbourhood'], Scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

#### Cluster 1

In [53]:
Scarborough_merged.loc[Scarborough_merged['Cluster Labels'] == 0, Scarborough_merged.columns[[1] + list(range(5, Scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0,Grocery Store,Diner,Video Store,Playground,Sporting Goods Shop,Coffee Shop,College Stadium,Convenience Store,Department Store,Electronics Store
2,Scarborough,0,Gym,Athletics & Sports,Business Service,Park,Chocolate Shop,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store
3,Scarborough,0,Coffee Shop,Soccer Field,Korean Restaurant,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store
4,Scarborough,0,Thai Restaurant,Playground,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store,Discount Store
5,Scarborough,0,Fast Food Restaurant,Convenience Store,Sandwich Place,Coffee Shop,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fish Market,Electronics Store
6,Scarborough,0,Discount Store,Train Station,Hobby Shop,Department Store,Convenience Store,Coffee Shop,Vietnamese Restaurant,Chinese Restaurant,Fried Chicken Joint,Flower Shop
7,Scarborough,0,Bus Line,Bakery,Coffee Shop,Metro Station,Soccer Field,Intersection,Bus Station,Vietnamese Restaurant,Convenience Store,Department Store
8,Scarborough,0,Pharmacy,Flower Shop,Coffee Shop,Bank,Bistro,Liquor Store,Sandwich Place,Department Store,College Stadium,Convenience Store
9,Scarborough,0,General Entertainment,Skating Rink,College Stadium,Café,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store
10,Scarborough,0,Grocery Store,Fast Food Restaurant,Caribbean Restaurant,Pizza Place,Auto Workshop,Bakery,Bank,Light Rail Station,Chinese Restaurant,Indian Restaurant


#### Cluster 2

In [55]:
Scarborough_merged.loc[Scarborough_merged['Cluster Labels'] == 1, Scarborough_merged.columns[[1] + list(range(5, Scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Scarborough,1,Park,Breakfast Spot,Vietnamese Restaurant,Chocolate Shop,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store,Discount Store


#### Cluster 3

In [56]:
Scarborough_merged.loc[Scarborough_merged['Cluster Labels'] == 2, Scarborough_merged.columns[[1] + list(range(5, Scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,2,Moving Target,Vietnamese Restaurant,Grocery Store,Fried Chicken Joint,Flower Shop,Fish Market,Fast Food Restaurant,Electronics Store,Discount Store,Diner
