# The Data Preprocessing

##  Cluster the neighborhoods in Toronto

In [739]:
!pip install folium



In [740]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#conda install -c conda-forge geopy #--yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [741]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'  #reading the wikipedia page for getting postal details
df_list = pd.read_html(url)

In [742]:
len(df_list)

3

In [743]:
df_canada_post=df_list[0] #contains the table 
print(len(df_canada_post))#length of the dataframe
df_canada_post.head()

180


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [744]:
df_canada=df_canada_post[df_canada_post["Borough"]!="Not assigned"].reset_index(drop=True) # Removing the Not assigned rows from DataFrame
df_canada.head()# and resting the index

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [745]:
postal_df=pd.read_csv("http://cocl.us/Geospatial_data")#reading the given CSV file for getting the lat and log details
postal_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [746]:
column_names=[col for col in df_canada.columns]+["Latitude","Longitude"] #intializing the new headers for obtain the required dataframe
column_names

['Postal Code', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude']

In [747]:
df_canada_Postal = pd.DataFrame(columns=column_names)#Create an empty dataframe with mentioned headers
df_canada_Postal

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude


In [748]:
#funtion defined for obtaining the lattitude and Longitude
def get_lat_log(postal_code):
    x=postal_df.index[postal_df["Postal Code"]== postal_code].item()
    return postal_df.loc[x,"Latitude"],postal_df.loc[x,"Longitude"]    

In [749]:
#Created required/metioned Dataframe 
for Postal_Code,Borough,Neighbourhood in zip(df_canada["Postal Code"],df_canada["Borough"],df_canada["Neighbourhood"]):
    Latitude=get_lat_log(Postal_Code)[0] #For Latitude
    Longitude=get_lat_log(Postal_Code)[1]#For Longitude
    df_canada_Postal=df_canada_Postal.append({"Postal Code":Postal_Code,
                             "Borough":Borough,
                             "Neighbourhood":Neighbourhood,
                             "Latitude":Latitude,
                             "Longitude":Longitude},ignore_index=True)
df_canada_Postal.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [750]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_canada_Postal['Borough'].unique()),
        df_canada_Postal.shape[0]
    )
)
df_canada_Postal['Borough'].unique()

The dataframe has 10 boroughs and 103 neighborhoods.


array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

In [751]:

address = 'Etobicoke, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Etobicoke are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Etobicoke are 43.6435559, -79.5656326.


In [752]:
# create map of Toronto using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, borough, neighborhood in zip(df_canada_Postal['Latitude'], df_canada_Postal['Longitude'], df_canada_Postal['Borough'], df_canada_Postal['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork



In [753]:
Etobicoke_df=df_canada_Postal[df_canada_Postal["Borough"]=="Etobicoke"].reset_index(drop=True)
Etobicoke_df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
5,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321
6,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
7,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
8,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054
9,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944


In [754]:
address = 'City of Toronto, CA'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of City of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of City of Toronto are 43.6534817, -79.3839347.


In [755]:
map_Etobicoke = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Etobicoke_df['Latitude'], Etobicoke_df['Longitude'],Etobicoke_df["Neighbourhood"]):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Etobicoke)  
    
map_Etobicoke

In [None]:
# The code was removed by Watson Studio for sharing.

In [757]:
Etobicoke_df.loc[0, 'Neighbourhood']

'Islington Avenue, Humber Valley Village'

# getting the Latitude and Longitute Value

In [758]:
neighborhood_latitude = Etobicoke_df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Etobicoke_df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Etobicoke_df.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Islington Avenue, Humber Valley Village are 43.6678556, -79.53224240000002.


In [759]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

In [760]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fc79a7fa5b71d40e97d34cb'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Edenbridge - Humber Valley',
  'headerFullLocation': 'Edenbridge - Humber Valley, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 12,
  'suggestedBounds': {'ne': {'lat': 43.676855609000015,
    'lng': -79.51982358836784},
   'sw': {'lat': 43.65885559099999, 'lng': -79.54466121163219}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bfd53764cf820a13849ecf4',
       'name': "Java Joe's Village Cafe",
       'location': {'address': '1500 Islington Ave',
        'crossStreet': 'at Rathburn Rd',
        'lat': 43.662460906352436,
        'lng': -7

In [761]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [762]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) # flatten JSON
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues

  from ipykernel import kernelapp as app


Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Java Joe's Village Cafe,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",43.662461,-79.532054
1,St Georges Golf and Country Club,"[{'id': '4bf58dd8d48988d1e6941735', 'name': 'G...",43.674395,-79.537142
2,COBS Bread,"[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...",43.66494,-79.520485
3,TD Canada Trust,"[{'id': '4bf58dd8d48988d10a951735', 'name': 'B...",43.662545,-79.531749
4,Shoppers Drug Mart,"[{'id': '4bf58dd8d48988d10f951735', 'name': 'P...",43.663067,-79.531753
5,Thorncrest Drug Store,"[{'id': '4bf58dd8d48988d10f951735', 'name': 'P...",43.662988,-79.531817
6,Foodland - Toronto,"[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",43.662724,-79.531984
7,Thorncrest Plaza,"[{'id': '4bf58dd8d48988d1fd941735', 'name': 'S...",43.66262,-79.532146
8,Princess Margaret Park,"[{'id': '4bf58dd8d48988d1e7941735', 'name': 'P...",43.667835,-79.539934
9,Humber Valley Park,"[{'id': '4bf58dd8d48988d163941735', 'name': 'P...",43.664825,-79.524999


In [763]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Java Joe's Village Cafe,Café,43.662461,-79.532054
1,St Georges Golf and Country Club,Golf Course,43.674395,-79.537142
2,COBS Bread,Bakery,43.66494,-79.520485
3,TD Canada Trust,Bank,43.662545,-79.531749
4,Shoppers Drug Mart,Pharmacy,43.663067,-79.531753


In [764]:
len(nearby_venues)

12

In [765]:
def getNearbyVenues(names, latitudes, longitudes, radius=550):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [766]:
Etobicoke_venues = getNearbyVenues(names=Etobicoke_df['Neighbourhood'],
                                   latitudes=Etobicoke_df['Latitude'],
                                   longitudes=Etobicoke_df['Longitude']
                                  )


Islington Avenue, Humber Valley Village
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Westmount
Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens
New Toronto, Mimico South, Humber Bay Shores
South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens
Alderwood, Long Branch
Northwest, West Humber - Clairville
The Kingsway, Montgomery Road, Old Mill North
Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East
Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West


In [767]:
print(Etobicoke_venues.shape)
Etobicoke_venues.head()
Etobicoke_venues[Etobicoke_venues["Neighbourhood"]=="Islington Avenue, Humber Valley Village"]

(90, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Islington Avenue, Humber Valley Village",43.667856,-79.532242,Shoppers Drug Mart,43.663067,-79.531753,Pharmacy
1,"Islington Avenue, Humber Valley Village",43.667856,-79.532242,Thorncrest Drug Store,43.662988,-79.531817,Pharmacy


In [768]:
Etobicoke_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alderwood, Long Branch",7,7,7,7,7,7
"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",9,9,9,9,9,9
"Islington Avenue, Humber Valley Village",2,2,2,2,2,2
"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens",6,6,6,6,6,6
"Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West",16,16,16,16,16,16
"New Toronto, Mimico South, Humber Bay Shores",17,17,17,17,17,17
"Northwest, West Humber - Clairville",4,4,4,4,4,4
"Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East",2,2,2,2,2,2
"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",10,10,10,10,10,10
"The Kingsway, Montgomery Road, Old Mill North",4,4,4,4,4,4


In [769]:
print('There are {} uniques categories.'.format(len(Etobicoke_venues['Venue Category'].unique())))

There are 45 uniques categories.


## Analyze Each Neighborhood

In [770]:
# one hot encoding
Etobicoke_onehot = pd.get_dummies(Etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Etobicoke_onehot['Neighbourhood'] = Etobicoke_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [Etobicoke_onehot.columns[-1]] + list(Etobicoke_onehot.columns[:-1])
Etobicoke_onehot = Etobicoke_onehot[fixed_columns]

Etobicoke_onehot

Unnamed: 0,Neighbourhood,American Restaurant,Bakery,Bar,Baseball Field,Beer Store,Burger Joint,Burrito Place,Bus Line,Café,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Dessert Shop,Discount Store,Drugstore,Electronics Store,Fast Food Restaurant,Flea Market,Fried Chicken Joint,Garden Center,Grocery Store,Gym,Hardware Store,Ice Cream Shop,Intersection,Kids Store,Liquor Store,Mexican Restaurant,Pharmacy,Pizza Place,Pool,Print Shop,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Shopping Plaza,Social Club,Supplement Shop,Tanning Salon,Tea Room,Thrift / Vintage Store,Wings Joint
0,"Islington Avenue, Humber Valley Village",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Islington Avenue, Humber Valley Village",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"West Deane Park, Princess Gardens, Martin Grov...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,"West Deane Park, Princess Gardens, Martin Grov...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"West Deane Park, Princess Gardens, Martin Grov...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,"West Deane Park, Princess Gardens, Martin Grov...",0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,"West Deane Park, Princess Gardens, Martin Grov...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
7,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [771]:
Etobicoke_onehot.shape

(90, 46)

In [772]:
Etobicoke_onehot.shape
Etobicoke_grouped = Etobicoke_onehot.groupby('Neighbourhood').mean().reset_index()
Etobicoke_grouped

Unnamed: 0,Neighbourhood,American Restaurant,Bakery,Bar,Baseball Field,Beer Store,Burger Joint,Burrito Place,Bus Line,Café,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Dessert Shop,Discount Store,Drugstore,Electronics Store,Fast Food Restaurant,Flea Market,Fried Chicken Joint,Garden Center,Grocery Store,Gym,Hardware Store,Ice Cream Shop,Intersection,Kids Store,Liquor Store,Mexican Restaurant,Pharmacy,Pizza Place,Pool,Print Shop,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Shopping Plaza,Social Club,Supplement Shop,Tanning Salon,Tea Room,Thrift / Vintage Store,Wings Joint
0,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.285714,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0
2,"Islington Avenue, Humber Valley Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Kingsview Village, St. Phillips, Martin Grove ...",0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Mimico NW, The Queensway West, South of Bloor,...",0.0,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0625,0.0625,0.0,0.0625,0.0625
5,"New Toronto, Mimico South, Humber Bay Shores",0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.117647,0.0,0.0,0.058824,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.117647,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Northwest, West Humber - Clairville",0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Old Mill South, King's Mill Park, Sunnylea, Hu...",0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"South Steeles, Silverstone, Humbergate, Jamest...",0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"The Kingsway, Montgomery Road, Old Mill North",0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [773]:
num_top_venues = 10

for hood in Etobicoke_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = Etobicoke_grouped[Etobicoke_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood, Long Branch----
                venue  freq
0         Pizza Place  0.29
1                 Gym  0.14
2                 Pub  0.14
3            Pharmacy  0.14
4      Sandwich Place  0.14
5         Coffee Shop  0.14
6        Intersection  0.00
7          Kids Store  0.00
8        Liquor Store  0.00
9  Mexican Restaurant  0.00


----Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood----
                 venue  freq
0    Convenience Store  0.11
1         Liquor Store  0.11
2           Beer Store  0.11
3             Pharmacy  0.11
4          Pizza Place  0.11
5                 Café  0.11
6       Shopping Plaza  0.11
7          Coffee Shop  0.11
8    Electronics Store  0.11
9  Rental Car Location  0.00


----Islington Avenue, Humber Valley Village----
                 venue  freq
0             Pharmacy   1.0
1  American Restaurant   0.0
2       Hardware Store   0.0
3         Intersection   0.0
4           Kids Store   0.0
5         Liquor Store   0.0
6   Mexican Resta

In [774]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False) 
    return row_categories_sorted.index.values[0:num_top_venues]
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = Etobicoke_grouped['Neighbourhood']
for ind in np.arange(Etobicoke_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Etobicoke_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood, Long Branch",Pizza Place,Gym,Coffee Shop,Sandwich Place,Pharmacy,Pub,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Coffee Shop,Pizza Place,Electronics Store,Beer Store,Shopping Plaza,Liquor Store,Café,Convenience Store,Pharmacy,Fast Food Restaurant
2,"Islington Avenue, Humber Valley Village",Pharmacy,Wings Joint,Construction & Landscaping,Fried Chicken Joint,Flea Market,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store,Dessert Shop
3,"Kingsview Village, St. Phillips, Martin Grove ...",Pizza Place,Beer Store,Sandwich Place,Bus Line,Chinese Restaurant,Pharmacy,Wings Joint,Flea Market,Fast Food Restaurant,Electronics Store
4,"Mimico NW, The Queensway West, South of Bloor,...",Wings Joint,Kids Store,Bakery,Burger Joint,Burrito Place,Convenience Store,Discount Store,Fast Food Restaurant,Grocery Store,Thrift / Vintage Store


In [775]:
Etobicoke_grouped['Neighbourhood']

0                                Alderwood, Long Branch
1     Eringate, Bloordale Gardens, Old Burnhamthorpe...
2               Islington Avenue, Humber Valley Village
3     Kingsview Village, St. Phillips, Martin Grove ...
4     Mimico NW, The Queensway West, South of Bloor,...
5          New Toronto, Mimico South, Humber Bay Shores
6                   Northwest, West Humber - Clairville
7     Old Mill South, King's Mill Park, Sunnylea, Hu...
8     South Steeles, Silverstone, Humbergate, Jamest...
9         The Kingsway, Montgomery Road, Old Mill North
10    West Deane Park, Princess Gardens, Martin Grov...
11                                            Westmount
Name: Neighbourhood, dtype: object

## Cluster the Neighbour

In [776]:
# set number of clusters
kclusters = 5

Etobicoke_grouped_clustering = Etobicoke_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 
# add clustering labels


array([0, 0, 2, 0, 0, 0, 3, 1, 0, 4], dtype=int32)

In [777]:
Etobicoke_grouped_clustering

Unnamed: 0,American Restaurant,Bakery,Bar,Baseball Field,Beer Store,Burger Joint,Burrito Place,Bus Line,Café,Chinese Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Dessert Shop,Discount Store,Drugstore,Electronics Store,Fast Food Restaurant,Flea Market,Fried Chicken Joint,Garden Center,Grocery Store,Gym,Hardware Store,Ice Cream Shop,Intersection,Kids Store,Liquor Store,Mexican Restaurant,Pharmacy,Pizza Place,Pool,Print Shop,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Shopping Plaza,Social Club,Supplement Shop,Tanning Salon,Tea Room,Thrift / Vintage Store,Wings Joint
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.285714,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0625,0.0625,0.0,0.0625,0.0625
5,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.117647,0.0,0.0,0.058824,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.117647,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [778]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)


In [779]:
# add clustering labels
#neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Etobicoke_merged = Etobicoke_df

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
Etobicoke_merged = Etobicoke_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

Etobicoke_merged # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242,2,Pharmacy,Wings Joint,Construction & Landscaping,Fried Chicken Joint,Flea Market,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store,Dessert Shop
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,0,Pizza Place,Print Shop,Tea Room,Chinese Restaurant,Coffee Shop,Flea Market,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,0,Coffee Shop,Pizza Place,Electronics Store,Beer Store,Shopping Plaza,Liquor Store,Café,Convenience Store,Pharmacy,Fast Food Restaurant
3,M9P,Etobicoke,Westmount,43.696319,-79.532242,0,Coffee Shop,Sandwich Place,Ice Cream Shop,Intersection,Discount Store,Pizza Place,Chinese Restaurant,Flea Market,Burrito Place,Bus Line
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,0,Pizza Place,Beer Store,Sandwich Place,Bus Line,Chinese Restaurant,Pharmacy,Wings Joint,Flea Market,Fast Food Restaurant,Electronics Store
5,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,0,Café,Coffee Shop,Mexican Restaurant,Gym,Pizza Place,Bakery,Dessert Shop,Fast Food Restaurant,Fried Chicken Joint,Grocery Store
6,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,0,Pizza Place,Grocery Store,Fast Food Restaurant,Beer Store,Sandwich Place,Liquor Store,Fried Chicken Joint,Pharmacy,Wings Joint,Electronics Store
7,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484,0,Pizza Place,Gym,Coffee Shop,Sandwich Place,Pharmacy,Pub,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store
8,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054,3,Garden Center,Bar,Drugstore,Rental Car Location,Construction & Landscaping,Fried Chicken Joint,Flea Market,Fast Food Restaurant,Electronics Store,Discount Store
9,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,4,Bakery,Pool,River,Wings Joint,Flea Market,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store,Dessert Shop


In [780]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
rainbow

['#8000ff', '#00b5eb', '#80ffb4', '#ffb360', '#ff0000']

In [781]:
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Etobicoke_merged['Latitude'], Etobicoke_merged['Longitude'], Etobicoke_merged['Neighbourhood'], Etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [782]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 0, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Etobicoke,0,Pizza Place,Print Shop,Tea Room,Chinese Restaurant,Coffee Shop,Flea Market,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store
2,Etobicoke,0,Coffee Shop,Pizza Place,Electronics Store,Beer Store,Shopping Plaza,Liquor Store,Café,Convenience Store,Pharmacy,Fast Food Restaurant
3,Etobicoke,0,Coffee Shop,Sandwich Place,Ice Cream Shop,Intersection,Discount Store,Pizza Place,Chinese Restaurant,Flea Market,Burrito Place,Bus Line
4,Etobicoke,0,Pizza Place,Beer Store,Sandwich Place,Bus Line,Chinese Restaurant,Pharmacy,Wings Joint,Flea Market,Fast Food Restaurant,Electronics Store
5,Etobicoke,0,Café,Coffee Shop,Mexican Restaurant,Gym,Pizza Place,Bakery,Dessert Shop,Fast Food Restaurant,Fried Chicken Joint,Grocery Store
6,Etobicoke,0,Pizza Place,Grocery Store,Fast Food Restaurant,Beer Store,Sandwich Place,Liquor Store,Fried Chicken Joint,Pharmacy,Wings Joint,Electronics Store
7,Etobicoke,0,Pizza Place,Gym,Coffee Shop,Sandwich Place,Pharmacy,Pub,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store
11,Etobicoke,0,Wings Joint,Kids Store,Bakery,Burger Joint,Burrito Place,Convenience Store,Discount Store,Fast Food Restaurant,Grocery Store,Thrift / Vintage Store


In [783]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 1, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Etobicoke,1,Construction & Landscaping,Baseball Field,Wings Joint,Fried Chicken Joint,Flea Market,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store,Dessert Shop


In [784]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 2, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,2,Pharmacy,Wings Joint,Construction & Landscaping,Fried Chicken Joint,Flea Market,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store,Dessert Shop


In [785]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 3, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Etobicoke,3,Garden Center,Bar,Drugstore,Rental Car Location,Construction & Landscaping,Fried Chicken Joint,Flea Market,Fast Food Restaurant,Electronics Store,Discount Store


In [786]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 4, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Etobicoke,4,Bakery,Pool,River,Wings Joint,Flea Market,Fast Food Restaurant,Electronics Store,Drugstore,Discount Store,Dessert Shop
