<H1>
   WORKBOOK TO ANALYZE THE TORONTO NEIGHBORHOOD DATA <br>
    Matt Muenster

In [10]:
import pandas as pd
import numpy as np
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df=pd.read_html(url)

<h2> Use only the first table

In [11]:
df=df[0]

<h2>Rename the Postcode column

In [12]:
df=df.rename(columns={"Postcode":"PostalCode"})

<h3>Remove the boroughs that are not assigned

In [13]:
df=df[df.Borough != 'Not assigned']

## Use borough for neighborhood when it is not assigned

In [14]:

df['Neighbourhood']=np.where(df['Neighbourhood'] == 'Not assigned', df['Borough'], df['Neighbourhood'] )

Group by postal code and aggregate neighborhoods into a list


In [15]:
g1=df.groupby(['PostalCode','Borough'])
g2=g1['Neighbourhood'].apply(list)
g3=pd.DataFrame(g2).reset_index()
g3.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"[Rouge, Malvern]"
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]
5,M1J,Scarborough,[Scarborough Village]
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]"
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]"
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]"
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]"


In [16]:
g3.shape

(103, 3)

# Add columns for the lat/long

In [17]:
g3['Latitude']='None'
g3['Longitude']='None'
g3.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"[Rouge, Malvern]",,
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",,
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",,
3,M1G,Scarborough,[Woburn],,
4,M1H,Scarborough,[Cedarbrae],,
5,M1J,Scarborough,[Scarborough Village],,
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]",,
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]",,
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]",,
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]",,


# Import the data for the lat/long

In [18]:
import io
import requests

url = "http://cocl.us/Geospatial_data"
s = requests.get(url).content
ds = pd.read_csv(io.StringIO(s.decode('utf-8')))
ds.head()


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


# Add the lat/long to the dataframe

In [19]:
g3['Latitude']=ds['Latitude']
g3['Longitude']=ds['Longitude']
g3.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",43.784535,-79.160497
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",43.763573,-79.188711
3,M1G,Scarborough,[Woburn],43.770992,-79.216917
4,M1H,Scarborough,[Cedarbrae],43.773136,-79.239476


# Get coordinates of Toronto for plotting of map

In [22]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


# Create a map of neighborhoods in Toronto

In [23]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

In [26]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(g3['Latitude'], g3['Longitude'], g3['Borough'], g3['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Define Foursquare Credentials and Version

In [30]:
CLIENT_ID = 'TQ1P524QGYGTUVYLMLBK5SEKPSNRH3BKLGT3ZKT2CGEKAOLP' # your Foursquare ID
CLIENT_SECRET = 'AD54RZRV2HO0WBMX55DCTQDAGQT5NJTOU3JAQBQUWDFQVJ2I' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TQ1P524QGYGTUVYLMLBK5SEKPSNRH3BKLGT3ZKT2CGEKAOLP
CLIENT_SECRET:AD54RZRV2HO0WBMX55DCTQDAGQT5NJTOU3JAQBQUWDFQVJ2I


In [37]:
g3.loc[0, 'PostalCode']
neighborhood_latitude = g3.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = g3.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = g3.loc[0, 'Borough'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))
# type your answer here
radius=1000
LIMIT=100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

Latitude and longitude values of Scarborough are 43.806686299999996, -79.19435340000001.


'https://api.foursquare.com/v2/venues/explore?&client_id=TQ1P524QGYGTUVYLMLBK5SEKPSNRH3BKLGT3ZKT2CGEKAOLP&client_secret=AD54RZRV2HO0WBMX55DCTQDAGQT5NJTOU3JAQBQUWDFQVJ2I&v=20180605&ll=43.806686299999996,-79.19435340000001&radius=1000&limit=100'

In [38]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5db8569953159300397a80f5'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 17,
  'suggestedBounds': {'ne': {'lat': 43.81568630900001,
    'lng': -79.18190576146081},
   'sw': {'lat': 43.797686290999984, 'lng': -79.20680103853921}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d669cba83865481c948fa53',
       'name': 'Images Salon & Spa',
       'location': {'address': '8130 Sheppard Ave E',
        'crossStreet': 'Morningside Ave',
        'lat': 43.80228301948931,
        'lng': -79.19856472801668,
        'labeledLatLngs'

In [39]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


In [43]:
import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Images Salon & Spa,Spa,43.802283,-79.198565
1,Caribbean Wave,Caribbean Restaurant,43.798558,-79.195777
2,Wendy's,Fast Food Restaurant,43.802008,-79.19808
3,Wendy's,Fast Food Restaurant,43.807448,-79.199056
4,Staples Morningside,Paper / Office Supplies Store,43.800285,-79.196607


In [44]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

17 venues were returned by Foursquare.


# Let's create a function to repeat the same process to all the neighborhoods in Manhattan

In [46]:

def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

# Now write the code to run the above function on each neighborhood and create a new dataframe called toronto_venues.

In [48]:
toronto_venues = getNearbyVenues(names=g3['PostalCode'],
                        latitudes=g3['Latitude'],
                        longitudes=g3['Longitude'])

M1B
M1C
M1E
M1G
M1H
M1J
M1K
M1L
M1M
M1N
M1P
M1R
M1S
M1T
M1V
M1W
M1X
M2H
M2J
M2K
M2L
M2M
M2N
M2P
M2R
M3A
M3B
M3C
M3H
M3J
M3K
M3L
M3M
M3N
M4A
M4B
M4C
M4E
M4G
M4H
M4J
M4K
M4L
M4M
M4N
M4P
M4R
M4S
M4T
M4V
M4W
M4X
M4Y
M5A
M5B
M5C
M5E
M5G
M5H
M5J
M5K
M5L
M5M
M5N
M5P
M5R
M5S
M5T
M5V
M5W
M5X
M6A
M6B
M6C
M6E
M6G
M6H
M6J
M6K
M6L
M6M
M6N
M6P
M6R
M6S
M7A
M7R
M7Y
M8V
M8W
M8X
M8Y
M8Z
M9A
M9B
M9C
M9L
M9M
M9N
M9P
M9R
M9V
M9W


In [49]:
# Let's check the size of the resulting dataframe
print(toronto_venues.shape)
toronto_venues.head()


(4892, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,M1B,43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
2,M1B,43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
3,M1B,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
4,M1B,43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store


# Let's check how many venues were returned for each neighborhood

In [50]:


toronto_venues.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M1B,17,17,17,17,17,17
M1C,5,5,5,5,5,5
M1E,23,23,23,23,23,23
M1G,9,9,9,9,9,9
M1H,28,28,28,28,28,28
M1J,12,12,12,12,12,12
M1K,29,29,29,29,29,29
M1L,31,31,31,31,31,31
M1M,11,11,11,11,11,11
M1N,14,14,14,14,14,14


# Let's find out how many unique categories can be curated from all the returned venues

In [51]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 329 uniques categories.


# Analyze the data for each neighborhood exactly as we did in the exercise for NYC

In [54]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()


Unnamed: 0,Zoo,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [55]:
toronto_onehot.shape

(4892, 329)

In [57]:

toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped


Unnamed: 0,Neighborhood,Zoo,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.000000,0.000000,0.000000
1,M1C,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.000000,0.000000,0.000000
2,M1E,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.000000,0.000000,0.000000
3,M1G,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.000000,0.000000,0.000000
4,M1H,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.035714,0.000000,0.035714
5,M1J,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.000000,0.000000,0.000000
6,M1K,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.000000,0.000000,0.000000
7,M1L,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.000000,0.000000,0.000000
8,M1M,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.000000,0.000000,0.000000
9,M1N,0.00,0.0,0.00,0.0,0.000000,0.0,0.000000,0.00,0.0,...,0.000000,0.000000,0.000000,0.0,0.00,0.00,0.000000,0.000000,0.000000,0.000000


In [78]:

num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')
    
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

----M1B----
                  venue  freq
0  Fast Food Restaurant  0.18
1           Bus Station  0.06
2            Hobby Shop  0.06
3                   Gym  0.06
4     Martial Arts Dojo  0.06


----M1C----
                venue  freq
0                Park   0.2
1        Burger Joint   0.2
2          Playground   0.2
3  Italian Restaurant   0.2
4      Breakfast Spot   0.2


----M1E----
                  venue  freq
0           Pizza Place  0.17
1  Fast Food Restaurant  0.09
2           Coffee Shop  0.09
3                  Bank  0.04
4        Sandwich Place  0.04


----M1G----
                venue  freq
0         Coffee Shop  0.22
1                Park  0.22
2  Chinese Restaurant  0.11
3   Electronics Store  0.11
4   Indian Restaurant  0.11


----M1H----
               venue  freq
0             Bakery  0.11
1        Coffee Shop  0.11
2           Pharmacy  0.07
3  Indian Restaurant  0.07
4        Yoga Studio  0.04


----M1J----
                  venue  freq
0  Fast Food Restaurant  0.25


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Sandwich Place,Gym,Bus Station,Martial Arts Dojo,Fruit & Vegetable Store,Caribbean Restaurant,Bakery,Chinese Restaurant,Paper / Office Supplies Store
1,M1C,Breakfast Spot,Park,Burger Joint,Playground,Italian Restaurant,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
2,M1E,Pizza Place,Fast Food Restaurant,Coffee Shop,Grocery Store,Fried Chicken Joint,Bank,Burger Joint,Discount Store,Sandwich Place,Chinese Restaurant
3,M1G,Coffee Shop,Park,Chinese Restaurant,Fast Food Restaurant,Electronics Store,Indian Restaurant,Pharmacy,Empanada Restaurant,Drugstore,Dumpling Restaurant
4,M1H,Coffee Shop,Bakery,Pharmacy,Indian Restaurant,Yoga Studio,Athletics & Sports,Pizza Place,Chinese Restaurant,Caribbean Restaurant,Music Store


# Cluster the data exactly as we did in the exercise for NYC

In [79]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 0, 0, 0, 0, 0, 0, 0, 4], dtype=int32)

In [81]:


# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhoods_venues_sorted

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,M1B,Fast Food Restaurant,Sandwich Place,Gym,Bus Station,Martial Arts Dojo,Fruit & Vegetable Store,Caribbean Restaurant,Bakery,Chinese Restaurant,Paper / Office Supplies Store
1,2,M1C,Breakfast Spot,Park,Burger Joint,Playground,Italian Restaurant,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
2,0,M1E,Pizza Place,Fast Food Restaurant,Coffee Shop,Grocery Store,Fried Chicken Joint,Bank,Burger Joint,Discount Store,Sandwich Place,Chinese Restaurant
3,0,M1G,Coffee Shop,Park,Chinese Restaurant,Fast Food Restaurant,Electronics Store,Indian Restaurant,Pharmacy,Empanada Restaurant,Drugstore,Dumpling Restaurant
4,0,M1H,Coffee Shop,Bakery,Pharmacy,Indian Restaurant,Yoga Studio,Athletics & Sports,Pizza Place,Chinese Restaurant,Caribbean Restaurant,Music Store
5,0,M1J,Fast Food Restaurant,Restaurant,Japanese Restaurant,Grocery Store,Sandwich Place,Coffee Shop,Train Station,Pizza Place,Bowling Alley,Convenience Store
6,0,M1K,Coffee Shop,Chinese Restaurant,Discount Store,Convenience Store,Grocery Store,Fast Food Restaurant,Bus Line,Bus Station,Metro Station,Light Rail Station
7,0,M1L,Intersection,Coffee Shop,Bakery,Fast Food Restaurant,Diner,Bus Line,Pharmacy,Soccer Field,Pub,General Entertainment
8,0,M1M,Fast Food Restaurant,Pizza Place,Beach,Sports Bar,Burger Joint,Cajun / Creole Restaurant,Hardware Store,Park,Yoga Studio,Empanada Restaurant
9,4,M1N,Diner,Park,Café,Asian Restaurant,Gym,Dessert Shop,Gym Pool,Restaurant,General Entertainment,College Stadium


In [87]:
toronto_merged = g3
toronto_merged

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",43.784535,-79.160497
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",43.763573,-79.188711
3,M1G,Scarborough,[Woburn],43.770992,-79.216917
4,M1H,Scarborough,[Cedarbrae],43.773136,-79.239476
5,M1J,Scarborough,[Scarborough Village],43.744734,-79.239476
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]",43.727929,-79.262029
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]",43.711112,-79.284577
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]",43.716316,-79.239476
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]",43.692657,-79.264848


In [88]:


# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='PostalCode')

#toronto_merged['Cluster Labels']=int(toronto_merged['Cluster Labels'])

toronto_merged # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353,0.0,Fast Food Restaurant,Sandwich Place,Gym,Bus Station,Martial Arts Dojo,Fruit & Vegetable Store,Caribbean Restaurant,Bakery,Chinese Restaurant,Paper / Office Supplies Store
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",43.784535,-79.160497,2.0,Breakfast Spot,Park,Burger Joint,Playground,Italian Restaurant,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",43.763573,-79.188711,0.0,Pizza Place,Fast Food Restaurant,Coffee Shop,Grocery Store,Fried Chicken Joint,Bank,Burger Joint,Discount Store,Sandwich Place,Chinese Restaurant
3,M1G,Scarborough,[Woburn],43.770992,-79.216917,0.0,Coffee Shop,Park,Chinese Restaurant,Fast Food Restaurant,Electronics Store,Indian Restaurant,Pharmacy,Empanada Restaurant,Drugstore,Dumpling Restaurant
4,M1H,Scarborough,[Cedarbrae],43.773136,-79.239476,0.0,Coffee Shop,Bakery,Pharmacy,Indian Restaurant,Yoga Studio,Athletics & Sports,Pizza Place,Chinese Restaurant,Caribbean Restaurant,Music Store
5,M1J,Scarborough,[Scarborough Village],43.744734,-79.239476,0.0,Fast Food Restaurant,Restaurant,Japanese Restaurant,Grocery Store,Sandwich Place,Coffee Shop,Train Station,Pizza Place,Bowling Alley,Convenience Store
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]",43.727929,-79.262029,0.0,Coffee Shop,Chinese Restaurant,Discount Store,Convenience Store,Grocery Store,Fast Food Restaurant,Bus Line,Bus Station,Metro Station,Light Rail Station
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]",43.711112,-79.284577,0.0,Intersection,Coffee Shop,Bakery,Fast Food Restaurant,Diner,Bus Line,Pharmacy,Soccer Field,Pub,General Entertainment
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]",43.716316,-79.239476,0.0,Fast Food Restaurant,Pizza Place,Beach,Sports Bar,Burger Joint,Cajun / Creole Restaurant,Hardware Store,Park,Yoga Studio,Empanada Restaurant
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]",43.692657,-79.264848,4.0,Diner,Park,Café,Asian Restaurant,Gym,Dessert Shop,Gym Pool,Restaurant,General Entertainment,College Stadium


In [99]:
toronto_merged.dropna(inplace=True)
toronto_merged=toronto_merged.astype({'Cluster Labels':'int32'})

toronto_merged # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353,0,Fast Food Restaurant,Sandwich Place,Gym,Bus Station,Martial Arts Dojo,Fruit & Vegetable Store,Caribbean Restaurant,Bakery,Chinese Restaurant,Paper / Office Supplies Store
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",43.784535,-79.160497,2,Breakfast Spot,Park,Burger Joint,Playground,Italian Restaurant,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",43.763573,-79.188711,0,Pizza Place,Fast Food Restaurant,Coffee Shop,Grocery Store,Fried Chicken Joint,Bank,Burger Joint,Discount Store,Sandwich Place,Chinese Restaurant
3,M1G,Scarborough,[Woburn],43.770992,-79.216917,0,Coffee Shop,Park,Chinese Restaurant,Fast Food Restaurant,Electronics Store,Indian Restaurant,Pharmacy,Empanada Restaurant,Drugstore,Dumpling Restaurant
4,M1H,Scarborough,[Cedarbrae],43.773136,-79.239476,0,Coffee Shop,Bakery,Pharmacy,Indian Restaurant,Yoga Studio,Athletics & Sports,Pizza Place,Chinese Restaurant,Caribbean Restaurant,Music Store
5,M1J,Scarborough,[Scarborough Village],43.744734,-79.239476,0,Fast Food Restaurant,Restaurant,Japanese Restaurant,Grocery Store,Sandwich Place,Coffee Shop,Train Station,Pizza Place,Bowling Alley,Convenience Store
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]",43.727929,-79.262029,0,Coffee Shop,Chinese Restaurant,Discount Store,Convenience Store,Grocery Store,Fast Food Restaurant,Bus Line,Bus Station,Metro Station,Light Rail Station
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]",43.711112,-79.284577,0,Intersection,Coffee Shop,Bakery,Fast Food Restaurant,Diner,Bus Line,Pharmacy,Soccer Field,Pub,General Entertainment
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]",43.716316,-79.239476,0,Fast Food Restaurant,Pizza Place,Beach,Sports Bar,Burger Joint,Cajun / Creole Restaurant,Hardware Store,Park,Yoga Studio,Empanada Restaurant
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]",43.692657,-79.264848,4,Diner,Park,Café,Asian Restaurant,Gym,Dessert Shop,Gym Pool,Restaurant,General Entertainment,College Stadium


In [100]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# The primary observation is that the PostalCodes of Toronto have only two main clusterings based on the businesses in that area.