In [3]:
# Web scraping stuff
import requests
!conda install -c anaconda beautifulsoup4 --yes
from bs4 import BeautifulSoup

import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.0

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - beautifulsoup4


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.11.28         |           py36_0         156 KB  anaconda

The following packages will be UPDATED:

    certifi: 2019.11.28-py36_0 conda-forge --> 2019.11.28-py36_0 anaconda
    openssl: 1.1.1d-h516909a_0 conda-forge --> 1.1.1-h7b6447c_0  anaconda


Downloading and Extracting Packages
certifi-2019.11.28   | 156 KB    | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
Solving environment: done


  current version: 4.5.11
  latest version: 4.8.0

Please update conda b

In [4]:
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge geocoder --yes
import geocoder
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.0

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          91 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.50-py_0   conda-forge
    geopy:         1.20.0-py_0 conda-forge


Downloading and Extracting Packages
geopy-1.20.0         | 57 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ###

# Pulling Postal Data from Wikipedia
using this tutorial: [bs4](https://medium.com/analytics-vidhya/web-scraping-wiki-tables-using-beautifulsoup-and-python-6b9ea26d8722)

## Pulling from wiki

In [6]:
website_url_str = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
website_url = requests.get(website_url_str).text

#soup = BeautifulSoup(website_url,'lxml')
soup = BeautifulSoup(website_url)

## Creating raw table

In [7]:
my_table = soup.find('table',{'class':'wikitable sortable'})

table_contents = my_table.find_all('td')
col = 0
post_codes = []
boroughs = []
neighborhoods = []
for tag in table_contents:
    if col == 0:
        post_codes.append(tag.get_text())
    elif col == 1:
        boroughs.append(tag.get_text())
    elif col == 2:
        neighborhoods.append(tag.get_text().strip())
    else:
        print('something is wrong')
        
    col = col + 1
    
    if col >= 3:
        col = 0
        
df = pd.DataFrame()
df['PostalCode'] = post_codes
df['Boroughs'] = boroughs
df['Neighborhoods'] = neighborhoods

## Cleaning up the table

In [8]:
# drop rows with unassigned boroughs
df.Boroughs = df['Boroughs'].replace('Not assigned', np.nan)
df.dropna(subset=['Boroughs'], inplace=True)
df.reset_index(drop=True, inplace=True)

In [9]:
# fill missing neighborhood values with Borough value on same row
df.Neighborhoods = df.Neighborhoods.replace('Not assigned', np.nan)
df.Neighborhoods.fillna(df.Boroughs, inplace=True)

In [10]:
# combining Neighborhood text into comma seperated strings for repeated Postal Codes
df = df.groupby(['PostalCode', 'Boroughs'])['Neighborhoods'].apply(', '.join).reset_index()

# Obtaining Latitudes and Longitudes

In [11]:
#this takes forever, comenting out and importing from csv

'''
lat_lng_coords = None
postal_code = df.PostalCode.values[0]
# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]
'''

"\nlat_lng_coords = None\npostal_code = df.PostalCode.values[0]\n# loop until you get the coordinates\nwhile(lat_lng_coords is None):\n  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))\n  lat_lng_coords = g.latlng\n\nlatitude = lat_lng_coords[0]\nlongitude = lat_lng_coords[1]\n"

In [12]:
geo_df = pd.read_csv("Geospatial_Coordinates.csv")
geo_df.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

In [13]:
df = df.merge(geo_df, left_on='PostalCode', right_on='PostalCode')
df.head()

Unnamed: 0,PostalCode,Boroughs,Neighborhoods,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Exploring the Map

## Setting up foursquare

In [14]:
info_file = "foursquare_info.sec"
info_df = pd.read_csv(info_file)
CLIENT_ID = info_df.ID.values[0] # your Foursquare ID
CLIENT_SECRET = info_df.SECRET.values[0] # your Foursquare Secret
VERSION = '20180605' # Foursquare API version


In [15]:
# test one location
ind = 3
name = df.loc[ind, 'Neighborhoods']
lat = df.loc[ind, 'Latitude']
lon = df.loc[ind, 'Longitude']

radius = 500
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    lat, 
    lon, 
    radius, 
    LIMIT)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e18b6815fb726001b60d7e7'},
 'response': {'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.7754921045, 'lng': -79.21069729639068},
   'sw': {'lat': 43.7664920955, 'lng': -79.22313750360935}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4cc1d28c06c254815ac18547',
       'name': 'Starbucks',
       'location': {'address': '300 Borough Dr',
        'crossStreet': 'Scarborough Town Centre',
        'lat': 43.770037201625215,
        'lng': -79.22115586641958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.770037201625215,
          'lng': -79.22115586641958}],
        'distance': 356,
        'cc': 'CA

## Grab foursquare info for all locations

function to grab locations in a loop

In [16]:
# this is my version of the venue loader, with the option to load from previous data
def getNearbyVenues(names, latitudes, longitudes, radius=500, load_existing_pickle_filename=None):
    
    if load_existing_pickle_filename == None:
    
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
                CLIENT_ID, 
                CLIENT_SECRET, 
                VERSION, 
                lat, 
                lng, 
                radius, 
                LIMIT)

            # make the GET request
            results = requests.get(url).json()["response"]['groups'][0]['items']

            # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                      'Neighborhood Latitude', 
                      'Neighborhood Longitude', 
                      'Venue', 
                      'Venue Latitude', 
                      'Venue Longitude', 
                      'Venue Category']
    else:
        print(f"loading venues from previously constructed dataframe in {load_existing_pickle_filename}")
        nearby_venues = pd.read_pickle(load_existing_pickle_filename)
            
    
    return(nearby_venues)

In [17]:
toronto_venues = getNearbyVenues(names=df.Neighborhoods,
                                 latitudes=df.Latitude,
                                 longitudes=df.Longitude,
                                 load_existing_pickle_filename = 'toronto_venues.p')

loading venues from previously constructed dataframe in toronto_venues.p


Explore the venues dataframe

In [18]:
print(f"Venues df has the shape {toronto_venues.shape}")
print(f"Venues df has this many venues for each neighborhood:")
toronto_venues.groupby('Neighborhood').count()

Venues df has the shape (2226, 7)
Venues df has this many venues for each neighborhood:


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,4,4,4,4,4,4
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3,3,3,3,3,3
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",10,10,10,10,10,10
"Alderwood, Long Branch",9,9,9,9,9,9
"Bathurst Manor, Downsview North, Wilson Heights",19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
Berczy Park,57,57,57,57,57,57
"Birch Cliff, Cliffside West",4,4,4,4,4,4


In [19]:
print(f"There are {len(toronto_venues['Venue Category'].unique())} unique categories.")

There are 270 unique categories.


## Prepping for clustering

In [20]:
# onehot
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']
# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.shape

(99, 270)

In [21]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0  Coffee Shop  0.08
1         Café  0.04
2   Steakhouse  0.04
3          Bar  0.04
4        Hotel  0.03


----Agincourt----
                       venue  freq
0                     Lounge  0.25
1             Breakfast Spot  0.25
2  Latin American Restaurant  0.25
3               Skating Rink  0.25
4              Metro Station  0.00


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
           venue  freq
0     Playground  0.33
1         Bakery  0.33
2           Park  0.33
3    Yoga Studio  0.00
4  Metro Station  0.00


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0           Pizza Place   0.2
1         Grocery Store   0.2
2           Video Store   0.1
3        Sandwich Place   0.1
4  Fast Food Restaurant   0.1


----Alderwood, Long Branch----
         venue  freq
0  Pizza Place  0.22
1  Coffee Shop  0.11
2         

Pack in to a Pandas dataframe

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [23]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Restaurant,Burger Joint,Hotel,Sushi Restaurant,Asian Restaurant
1,Agincourt,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Women's Store,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Bakery,Playground,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Fried Chicken Joint,Pharmacy,Video Store,Fast Food Restaurant,Beer Store,Sandwich Place,Women's Store,Dog Run
4,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Pool,Gym,Skating Rink,Pharmacy,Pub,Sandwich Place,Dessert Shop,Dim Sum Restaurant


## Cluster Time
instantiate and fit cluster object

In [24]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 1, 4, 0, 0, 4, 0, 0, 4], dtype=int32)

New df for cluster and venues

In [25]:
df.head()

Unnamed: 0,PostalCode,Boroughs,Neighborhoods,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [26]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
df.rename(columns={'Neighborhoods':'Neighborhood'}, inplace=True)
toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

In [27]:
toronto_merged.head(5) # check the last columns!

Unnamed: 0,PostalCode,Boroughs,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,4.0,Fast Food Restaurant,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Dessert Shop
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,4.0,Moving Target,Bar,History Museum,Women's Store,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,Mexican Restaurant,Medical Center,Electronics Store,Spa,Breakfast Spot,Pizza Place,Rental Car Location,Intersection,Moving Target,Diner
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Pharmacy,Korean Restaurant,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,4.0,Hakka Restaurant,Gas Station,Bank,Fried Chicken Joint,Caribbean Restaurant,Athletics & Sports,Thai Restaurant,Bakery,Dumpling Restaurant,Drugstore


Final cleaning

In [28]:
print(toronto_merged.shape)
toronto_final = toronto_merged.dropna()
print(toronto_final.shape)

(103, 16)
(100, 16)


# Visualization of Clusterned Neighborhoods

In [30]:
# create map
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_final['Latitude'], toronto_final['Longitude'], toronto_final['Neighborhood'], toronto_final['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

![Map](toronto_cluster_map.jpg)

# Specific inquries
how many are in each label?

In [31]:
toronto_final['Cluster Labels'].value_counts()

0.0    56
4.0    27
1.0    13
3.0     2
2.0     2
Name: Cluster Labels, dtype: int64

group 2 is small, what's in there?

In [32]:
toronto_final.loc[toronto_final['Cluster Labels'] == 2].head()

Unnamed: 0,PostalCode,Boroughs,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2.0,Playground,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,2.0,Gym,Tennis Court,Playground,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant


group 3 is small, whats in there?

In [34]:
toronto_final.loc[toronto_final['Cluster Labels'] == 3].head()

Unnamed: 0,PostalCode,Boroughs,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,M8Y,Etobicoke,"Humber Bay, King's Mill Park, Kingsway Park So...",43.636258,-79.498509,3.0,Baseball Field,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Festival
97,M9M,North York,"Emery, Humberlea",43.724766,-79.532242,3.0,Paper / Office Supplies Store,Baseball Field,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant


In [35]:
toronto_final.loc[toronto_final['Cluster Labels'] == 0].head()

Unnamed: 0,PostalCode,Boroughs,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0.0,Mexican Restaurant,Medical Center,Electronics Store,Spa,Breakfast Spot,Pizza Place,Rental Car Location,Intersection,Moving Target,Diner
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Pharmacy,Korean Restaurant,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,0.0,Discount Store,Department Store,Coffee Shop,Convenience Store,Chinese Restaurant,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
11,M1R,Scarborough,"Maryvale, Wexford",43.750072,-79.295849,0.0,Smoke Shop,Sandwich Place,Auto Garage,Breakfast Spot,Shopping Mall,Bakery,Dog Run,Doner Restaurant,Donut Shop,Drugstore
12,M1S,Scarborough,Agincourt,43.7942,-79.262029,0.0,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Women's Store,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore


In [36]:
toronto_final.loc[toronto_final['Cluster Labels'] == 1].head()

Unnamed: 0,PostalCode,Boroughs,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,M1V,Scarborough,"Agincourt North, L'Amoreaux East, Milliken, St...",43.815252,-79.284577,1.0,Park,Bakery,Playground,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
23,M2P,North York,York Mills West,43.752758,-79.400049,1.0,Park,Bank,Convenience Store,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
25,M3A,North York,Parkwoods,43.753259,-79.329656,1.0,Park,Food & Drink Shop,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
30,M3K,North York,"CFB Toronto, Downsview East",43.737473,-79.464763,1.0,Park,Airport,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
40,M4J,East York,East Toronto,43.685347,-79.338106,1.0,Park,Coffee Shop,Convenience Store,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore


In [37]:
toronto_final.loc[toronto_final['Cluster Labels'] == 4].head()

Unnamed: 0,PostalCode,Boroughs,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,4.0,Fast Food Restaurant,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store,Dessert Shop
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,4.0,Moving Target,Bar,History Museum,Women's Store,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,4.0,Hakka Restaurant,Gas Station,Bank,Fried Chicken Joint,Caribbean Restaurant,Athletics & Sports,Thai Restaurant,Bakery,Dumpling Restaurant,Drugstore
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,4.0,Bakery,Bus Line,Fast Food Restaurant,Park,Intersection,Metro Station,Soccer Field,Cosmetics Shop,Construction & Landscaping,Comfort Food Restaurant
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,4.0,American Restaurant,Motel,Dessert Shop,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Women's Store


# Key Take Aways

1. Clusters "2" and "3" have very few items compared to the others. The top venues in these groups seem sports related. Perhaps their location is what is separating them. May want to manually merge these groups.
1. The Map shows that the larger clusters (0,1 and 4) are mostly arranged at differeent radii around the downtown, where 0 is most downtown, 1 is a ring round 0 and 4 is a ring around 1. Interestingly, 0 and 4 all have mostly restaruants, with department and convenience stores in there 2nd place. However, 1 has Parks in its top place, leading to the conclusion that cluster 1 represents a ring of urban park areas surounding downtown, and surrounded by retail.