# Importing Modules

In [61]:
import numpy as np
import pandas as pd
import json
import requests
from bs4 import BeautifulSoup
import os
import folium
import geocoder
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Preparation of dataframe

In [62]:
wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wikipedia_page=requests.get(wikipedia_link) 

# define the dataframe columns
column_names = ['PostalCode','Borough', 'Neighborhood'] 

# instantiate the dataframe
df = pd.DataFrame(columns=column_names)

bs = BeautifulSoup(wikipedia_page.text, 'html.parser')

#Initialization of a few parameters.
i=1

#Preparation of the dataframe
for j in bs.select("td"):

    if i%3==1:
        PCode=j.getText()
    elif i%3==2:
        Bourough=j.getText()
    else:
        Neighborhood=j.getText() 
        omnum=Neighborhood.rfind('\n')
        Neighborhood=Neighborhood[0:omnum] #Omitting \n.
        
    if i<=867:
        if i%3==0 and Bourough!='Not assigned':# Ignore cells with a borough that is Not assigned
            if (i%3==0) and (Neighborhood=='Not assigned'): # If neighborhood is not assigned, the neighborhood will be the same as the borough.
                df = df.append({'PostalCode': PCode,'Borough': Bourough,'Neighborhood': Bourough}, ignore_index=True) 
            else:
                df = df.append({'PostalCode': PCode,'Borough': Bourough,'Neighborhood': Neighborhood}, ignore_index=True)
    else:
        break
       
    i=i+1
    
    

############
# instantiate the dataframe
df_new = pd.DataFrame(columns=column_names)
PostalCodeList=df['PostalCode'].value_counts().index
df_new['PostalCode']=PostalCodeList

##
num_df=df.shape[0]
num_PostalCode=df['PostalCode'].value_counts().shape[0]

for j1 in np.linspace(0,num_PostalCode-1,num_PostalCode):
    num=0
    for j2 in np.linspace(0,num_df-1,num_df):
        if df['PostalCode'][j2]==df_new['PostalCode'][j1]: 
            df_new['Borough'][j1]=df['Borough'][j2]
            if num==0:
                df_new['Neighborhood'][j1]=df['Neighborhood'][j2]
            else:
                df_new['Neighborhood'][j1]=df['Neighborhood'][j2] + ', ' + df_new['Neighborhood'][j1]
            num=num+1

# Adding new columns
df_new['Latitude']='0'
df_new['Longitude']='0'

##############

CurrentDirectory=os.getcwd()
CSVdata_path=CurrentDirectory + "/Geospatial_Coordinates.csv"
csv_input = pd.read_csv(filepath_or_buffer=CSVdata_path)

for j1 in np.linspace(0,num_PostalCode-1,num_PostalCode):
    for j2 in np.linspace(0,num_PostalCode-1,num_PostalCode):
        if csv_input['Postal Code'][j2]==df_new['PostalCode'][j1]:
            df_new['Latitude'][j1]=csv_input['Latitude'][j2]
            df_new['Longitude'][j1]=csv_input['Longitude'][j2]

            
df_new.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M9V,Etobicoke,"Thistletown, South Steeles, Silverstone, Mount...",43.7394,-79.5884
1,M8Y,Etobicoke,"Sunnylea, Royal York South East, The Queensway...",43.6363,-79.4985
2,M5V,Downtown Toronto,"South Niagara, Railway Lands, King and Spadina...",43.6289,-79.3944
3,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.6509,-79.5547
4,M8Z,Etobicoke,"South of Bloor, Royal York South West, The Que...",43.6288,-79.521
5,M4V,Central Toronto,"Summerhill West, South Hill, Rathnelly, Forest...",43.6864,-79.4
6,M9R,Etobicoke,"St. Phillips, Richview Gardens, Martin Grove G...",43.6889,-79.5547
7,M9C,Etobicoke,"Old Burnhamthorpe, Markland Wood, Eringate, Bl...",43.6435,-79.5772
8,M1V,Scarborough,"Steeles East, Milliken, L'Amoreaux East, Aginc...",43.8153,-79.2846
9,M6M,York,"Silverthorn, Mount Dennis, Keelesdale, Del Ray",43.6911,-79.476


In [63]:
print('The dataframe has {} boroughs and {} neighborhoods.'
      .format(len(df_new['Borough'].unique()),df_new.shape[0]))

The dataframe has 11 boroughs and 103 neighborhoods.


# Creating a map of Toronto with neighborhoods superimposed on top.

In [65]:
#Coordinate of Toronto
latitude_x=43.653963
longitude_y=-79.387207


# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude_x, longitude_y], zoom_start=10)

# add markers to map
for lat, lng, bor, nei in zip(df_new['Latitude'], df_new['Longitude'], df_new['Borough'], df_new['Neighborhood']):
    
    label = '{}, {}'.format(nei, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

# Analysis for Central Toronto

## Map

Data of Central Toronto will be picked up:

In [78]:
Place = 'Central Toronto' # You can choose...
partial_data = df_new[df_new['Borough'] == Place].reset_index(drop=True)
partial_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4V,Central Toronto,"Summerhill West, South Hill, Rathnelly, Forest...",43.6864,-79.4
1,M5R,Central Toronto,"Yorkville, North Midtown, The Annex",43.6727,-79.4057
2,M5P,Central Toronto,"Forest Hill West, Forest Hill North",43.6969,-79.4113
3,M4T,Central Toronto,"Summerhill East, Moore Park",43.6896,-79.3832
4,M5N,Central Toronto,Roselawn,43.7117,-79.4169


The map for Central Toronto will be shown:

In [109]:
# create map of Central Toronto using latitude and longitude values
map_centraltoronto = folium.Map(location=[latitude_x, longitude_y], zoom_start=10)

# add markers to map
for lat, lng, bor, nei in zip(partial_data['Latitude'], partial_data['Longitude'], partial_data['Borough'], partial_data['Neighborhood']):
    
    label = '{}, {}'.format(nei, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_centraltoronto)  
    
map_centraltoronto

## Foursquare Credentials and Version

In [80]:
CLIENT_ID = 'your Foursquare ID' # your Foursquare ID
CLIENT_SECRET = 'your Foursquare Secret' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PZZDZJQ40QLEMC4OTOWZNFJ5GWAX1RKQ2VWSTJAIJKUFA2PW
CLIENT_SECRET:QSH0EO045TWKZCID2JOPFWNTU4POQS5NUJCG12ZP5114FM0F


## The top 100 venues that are in Central Toronto within a radius of 500 meters

The first neighbourhood in the dataframe:

In [110]:
partial_data.loc[0, 'Neighborhood']

'Summerhill West, South Hill, Rathnelly, Forest Hill SE, Deer Park'

Latitude and longitude values will be obtained.

In [82]:
neighborhood_latitude = partial_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = partial_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = partial_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Summerhill West, South Hill, Rathnelly, Forest Hill SE, Deer Park are 43.68641229999999, -79.4000493.


In [83]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=PZZDZJQ40QLEMC4OTOWZNFJ5GWAX1RKQ2VWSTJAIJKUFA2PW&client_secret=QSH0EO045TWKZCID2JOPFWNTU4POQS5NUJCG12ZP5114FM0F&v=20180605&ll=43.68641229999999,-79.4000493&radius=500&limit=100'

In [84]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c6f2c274c1f676352af2aa1'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Deer Park',
  'headerFullLocation': 'Deer Park, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 15,
  'suggestedBounds': {'ne': {'lat': 43.690912304499996,
    'lng': -79.39383797359734},
   'sw': {'lat': 43.68191229549999, 'lng': -79.40626062640267}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '55c78cef498ec4095e9fba41',
       'name': 'LCBO',
       'location': {'address': '111 St. Clair West',
        'lat': 43.686990631074885,
        'lng': -79.39923810519545,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.68699063

In [85]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [86]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,LCBO,Convenience Store,43.686991,-79.399238
1,The Market By Longo’s,Supermarket,43.686711,-79.399536
2,Union Social Eatery,American Restaurant,43.687895,-79.394916
3,Daeco Sushi,Sushi Restaurant,43.687838,-79.395652
4,Starbucks,Coffee Shop,43.687101,-79.398612


In [74]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

15 venues were returned by Foursquare.


# Exploring neighbourhoods

In [75]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Venues in Central Toronto

In [113]:
CTvenues = getNearbyVenues(names=partial_data['Neighborhood'],
                                   latitudes=partial_data['Latitude'],
                                   longitudes=partial_data['Longitude']
                                  )

Summerhill West, South Hill, Rathnelly, Forest Hill SE, Deer Park
Yorkville, North Midtown, The Annex
Forest Hill West, Forest Hill North
Summerhill East, Moore Park
Roselawn
North Toronto West
Davisville
Davisville North
Lawrence Park


The generated dataframe:

In [112]:
print(CTvenues.shape)
CTvenues.head()

(114, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Summerhill West, South Hill, Rathnelly, Forest...",43.686412,-79.400049,LCBO,43.686991,-79.399238,Convenience Store
1,"Summerhill West, South Hill, Rathnelly, Forest...",43.686412,-79.400049,The Market By Longo’s,43.686711,-79.399536,Supermarket
2,"Summerhill West, South Hill, Rathnelly, Forest...",43.686412,-79.400049,Union Social Eatery,43.687895,-79.394916,American Restaurant
3,"Summerhill West, South Hill, Rathnelly, Forest...",43.686412,-79.400049,Daeco Sushi,43.687838,-79.395652,Sushi Restaurant
4,"Summerhill West, South Hill, Rathnelly, Forest...",43.686412,-79.400049,Starbucks,43.687101,-79.398612,Coffee Shop


# Analyzing each neighborhood

In [94]:
# one hot encoding
CTonehot = pd.get_dummies(CTvenues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
CTonehot['Neighborhood'] = CTvenues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [CTonehot.columns[-1]] + list(CTonehot.columns[:-1])
CTonehot = CTonehot[fixed_columns]

CTonehot.head()

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Dance Studio,Dessert Shop,Dim Sum Restaurant,Diner,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Fried Chicken Joint,Garden,Gourmet Shop,Greek Restaurant,Gym,History Museum,Hotel,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Jewish Restaurant,Lake,Light Rail Station,Liquor Store,Metro Station,Mexican Restaurant,Park,Pharmacy,Pizza Place,Playground,Pub,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Store,Spa,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,"Summerhill West, South Hill, Rathnelly, Forest...",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Summerhill West, South Hill, Rathnelly, Forest...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2,"Summerhill West, South Hill, Rathnelly, Forest...",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Summerhill West, South Hill, Rathnelly, Forest...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
4,"Summerhill West, South Hill, Rathnelly, Forest...",0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Grouping rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [96]:
CTgrouped = CTonehot.groupby('Neighborhood').mean().reset_index()
CTgrouped

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Dance Studio,Dessert Shop,Dim Sum Restaurant,Diner,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Fried Chicken Joint,Garden,Gourmet Shop,Greek Restaurant,Gym,History Museum,Hotel,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Jewish Restaurant,Lake,Light Rail Station,Liquor Store,Metro Station,Mexican Restaurant,Park,Pharmacy,Pizza Place,Playground,Pub,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Store,Spa,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Davisville,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.057143,0.0,0.0,0.057143,0.0,0.0,0.0,0.085714,0.0,0.028571,0.028571,0.0,0.0,0.028571,0.0,0.028571,0.028571,0.028571,0.0,0.0,0.028571,0.057143,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.057143,0.057143,0.0,0.0,0.0,0.057143,0.0,0.085714,0.028571,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0
1,Davisville North,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Forest Hill West, Forest Hill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0
3,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0
4,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.142857,0.095238,0.0,0.047619,0.0,0.047619,0.0,0.047619,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.047619,0.047619,0.0,0.047619,0.047619,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619
5,Roselawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Summerhill East, Moore Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Summerhill West, South Hill, Rathnelly, Forest...",0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0
8,"Yorkville, North Midtown, The Annex",0.043478,0.043478,0.0,0.0,0.0,0.043478,0.0,0.130435,0.0,0.0,0.130435,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.043478,0.0,0.0,0.043478,0.0,0.0,0.043478,0.043478,0.086957,0.0,0.043478,0.0,0.0,0.0,0.130435,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0


In [97]:
CTgrouped.shape

(9, 61)

## Showing each neighborhood along with the top 5 most common venues

In [98]:
num_top_venues = 5

for hood in CTgrouped['Neighborhood']:
    print("----"+hood+"----")
    temp = CTgrouped[CTgrouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Davisville----
                venue  freq
0      Sandwich Place  0.09
1        Dessert Shop  0.09
2                Café  0.06
3  Italian Restaurant  0.06
4    Sushi Restaurant  0.06


----Davisville North----
            venue  freq
0           Hotel  0.12
1            Park  0.12
2    Dance Studio  0.12
3             Gym  0.12
4  Sandwich Place  0.12


----Forest Hill West, Forest Hill North----
                venue  freq
0       Jewelry Store  0.25
1               Trail  0.25
2    Sushi Restaurant  0.25
3                Park  0.25
4  Salon / Barbershop  0.00


----Lawrence Park----
                venue  freq
0  Dim Sum Restaurant   0.2
1            Bus Line   0.2
2                Park   0.2
3         Swim School   0.2
4                Lake   0.2


----North Toronto West----
                 venue  freq
0       Clothing Store  0.14
1          Coffee Shop  0.10
2  Sporting Goods Shop  0.10
3          Yoga Studio  0.05
4         Dessert Shop  0.05


----Roselawn----
              

## Preparation of pandas dataframe

In [99]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [102]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = CTgrouped['Neighborhood']

for ind in np.arange(CTgrouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(CTgrouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Davisville,Sandwich Place,Dessert Shop,Coffee Shop,Café,Restaurant,Pizza Place,Italian Restaurant,Thai Restaurant,Sushi Restaurant,Pharmacy
1,Davisville North,Dance Studio,Park,Hotel,Breakfast Spot,Gym,Burger Joint,Sandwich Place,Food & Drink Shop,Farmers Market,Fast Food Restaurant
2,"Forest Hill West, Forest Hill North",Trail,Jewelry Store,Sushi Restaurant,Park,Yoga Studio,Fried Chicken Joint,Diner,Farmers Market,Fast Food Restaurant,Food & Drink Shop
3,Lawrence Park,Dim Sum Restaurant,Lake,Swim School,Bus Line,Park,Yoga Studio,History Museum,Gym,Greek Restaurant,Gourmet Shop
4,North Toronto West,Clothing Store,Coffee Shop,Sporting Goods Shop,Yoga Studio,Fast Food Restaurant,Diner,Metro Station,Mexican Restaurant,Park,Dessert Shop


# Cluster neighbouring

## *k*-means clustering method to cluster the neighborhood into 5 clusters

In [103]:
# set number of clusters
kclusters = 5

CTgrouped_clustering = CTgrouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(CTgrouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 3, 4, 0, 2, 1, 0, 0], dtype=int32)

In [104]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

CTmerged = partial_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
CTmerged = CTmerged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

CTmerged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4V,Central Toronto,"Summerhill West, South Hill, Rathnelly, Forest...",43.6864,-79.4,0,Pub,Coffee Shop,American Restaurant,Supermarket,Convenience Store,Light Rail Station,Spa,Sports Bar,Fried Chicken Joint,Sushi Restaurant
1,M5R,Central Toronto,"Yorkville, North Midtown, The Annex",43.6727,-79.4057,0,Coffee Shop,Café,Sandwich Place,Pizza Place,American Restaurant,Indian Restaurant,Jewish Restaurant,Liquor Store,Park,Pharmacy
2,M5P,Central Toronto,"Forest Hill West, Forest Hill North",43.6969,-79.4113,3,Trail,Jewelry Store,Sushi Restaurant,Park,Yoga Studio,Fried Chicken Joint,Diner,Farmers Market,Fast Food Restaurant,Food & Drink Shop
3,M4T,Central Toronto,"Summerhill East, Moore Park",43.6896,-79.3832,1,Restaurant,Playground,Yoga Studio,Hotel,History Museum,Gym,Greek Restaurant,Gourmet Shop,Garden,Fried Chicken Joint
4,M5N,Central Toronto,Roselawn,43.7117,-79.4169,2,Garden,Yoga Studio,Vietnamese Restaurant,Indian Restaurant,Hotel,History Museum,Gym,Greek Restaurant,Gourmet Shop,Fried Chicken Joint


In [107]:
# create map
map_clusters = folium.Map(location=[latitude_x, longitude_y], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(CTmerged['Latitude'], CTmerged['Longitude'], CTmerged['Neighborhood'], CTmerged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

## Cluster 1

In [116]:
CTmerged.loc[CTmerged['Cluster Labels'] == 0, CTmerged.columns[[1] + list(range(5, CTmerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,0,Pub,Coffee Shop,American Restaurant,Supermarket,Convenience Store,Light Rail Station,Spa,Sports Bar,Fried Chicken Joint,Sushi Restaurant
1,Central Toronto,0,Coffee Shop,Café,Sandwich Place,Pizza Place,American Restaurant,Indian Restaurant,Jewish Restaurant,Liquor Store,Park,Pharmacy
5,Central Toronto,0,Clothing Store,Coffee Shop,Sporting Goods Shop,Yoga Studio,Fast Food Restaurant,Diner,Metro Station,Mexican Restaurant,Park,Dessert Shop
6,Central Toronto,0,Sandwich Place,Dessert Shop,Coffee Shop,Café,Restaurant,Pizza Place,Italian Restaurant,Thai Restaurant,Sushi Restaurant,Pharmacy
7,Central Toronto,0,Dance Studio,Park,Hotel,Breakfast Spot,Gym,Burger Joint,Sandwich Place,Food & Drink Shop,Farmers Market,Fast Food Restaurant


## Cluster 2

In [117]:
CTmerged.loc[CTmerged['Cluster Labels'] == 1, CTmerged.columns[[1] + list(range(5, CTmerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Central Toronto,1,Restaurant,Playground,Yoga Studio,Hotel,History Museum,Gym,Greek Restaurant,Gourmet Shop,Garden,Fried Chicken Joint


## Cluster 3

In [119]:
CTmerged.loc[CTmerged['Cluster Labels'] == 2, CTmerged.columns[[1] + list(range(5, CTmerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,2,Garden,Yoga Studio,Vietnamese Restaurant,Indian Restaurant,Hotel,History Museum,Gym,Greek Restaurant,Gourmet Shop,Fried Chicken Joint


## Cluster 4

In [122]:
CTmerged.loc[CTmerged['Cluster Labels'] == 3, CTmerged.columns[[1] + list(range(5, CTmerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Central Toronto,3,Trail,Jewelry Store,Sushi Restaurant,Park,Yoga Studio,Fried Chicken Joint,Diner,Farmers Market,Fast Food Restaurant,Food & Drink Shop


## Cluster 5

In [123]:
CTmerged.loc[CTmerged['Cluster Labels'] == 4, CTmerged.columns[[1] + list(range(5, CTmerged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Central Toronto,4,Dim Sum Restaurant,Lake,Swim School,Bus Line,Park,Yoga Studio,History Museum,Gym,Greek Restaurant,Gourmet Shop
