Install packages if needed

In [2]:
#Uncomment these if needed
# These take a while to run
#Run these then comment and run the cell again to clear output
#!conda install -c conda-forge wikipedia --yes
#!conda install -c conda-forge pandasql --yes
#!conda install -c conda-forge folium=0.5.0 --yes 
#!conda install -c conda-forge geopy --yes #

##### Get Latitude / Longitude data - short list of 10 postal codes

In [3]:

# Import Packages
import pandas as pd
import wikipedia as wp
import numpy as np
import pandasql as pdsql
pysql = lambda q: pdsql.sqldf(q, globals())
# Read Geo Data
df_geo=pd.read_csv('http://cocl.us/Geospatial_data')
# Read Postal Code Data
#Get the html source
html = wp.page("List of postal codes of Canada: M").html().encode("UTF-8")
df = pd.read_html(html)[0]
# Remove Not assigned values from Borough
pysql = lambda q: pdsql.sqldf(q, globals())
df=pysql('select * from df where borough!=\'Not assigned\'')
# Boroughs with Not assigned vale for Neighborhood get the Borough name as Neighborhood
df=pysql('select Postcode, Borough, CASE when Neighbourhood=\'Not assigned\' then Borough else Neighbourhood END as Neighbourhood from df order by Postcode, Borough')
# Convert Neighborhoods into a list as opposed to separate records
# Manipulate data in a numpy array
x=df.to_numpy()
y=int(len(df.index))-1
z=0
q=z-1
while z<y:
    while q<=(y-1):
        if x[z,0]==x[q,0] and x[z,1]==x[q,1] and x[z,2]!=x[q,2]:
            W=x[q,2] + "," + x[z,2]
            x[z,2]=W
            x[q,2]=W
        q=q+1
        z=z+1
# Put data back in dataframe
df=pd.DataFrame({'Postcode': x[:, 0], 'Borough': x[:,1], 'Neighbourhood': x[:, 2]})
#Query insures we get accurate multi-neighborhood records
df=pysql('select distinct Postcode, Borough, Neighbourhood from df where length(Neighbourhood)=(select max(length(Neighbourhood)) from df as q where df.Postcode=q.Postcode and df.Borough=q.Borough)')
# Combine dataframes and show head so I can pick areas
df_combined=pysql('select df.Postcode, df.Borough, df.Neighbourhood, df_geo.Latitude, df_geo.Longitude from df INNER JOIN df_geo on df.Postcode=df_geo."Postal Code"')
df_combined.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


##### Additional Imports

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json # library to handle JSON files


from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans


import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Let's start with a map of Toronto with neighborhoods superimposed over it

#### Use geopy library to get the latitude and longitude values of Toronto

In [6]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="my_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


##### Create map of Toronto with neighborhoods superimposed on top

In [8]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_combined['Latitude'], df_combined['Longitude'], df_combined['Borough'], df_combined['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=10,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

### Now let's look at some Foursquare stuff...

In [9]:
CLIENT_ID = 'U1TSCAOBPQDTO1JTQKWHUAZBTWEVPXNOIYE5KVN32J0PCFT1' # your Foursquare ID
CLIENT_SECRET = '3ZMFAU5QWV3UAEUJGHKRX5XMSMHJLYZDYN3FK25IV0CNUC2Y' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: U1TSCAOBPQDTO1JTQKWHUAZBTWEVPXNOIYE5KVN32J0PCFT1
CLIENT_SECRET:3ZMFAU5QWV3UAEUJGHKRX5XMSMHJLYZDYN3FK25IV0CNUC2Y


#### Explore the first Borough

In [10]:
# Get Latitude / Longitude / Name Vales
Lat=df_combined.loc[0, 'Latitude']
Long=df_combined.loc[0, 'Longitude']
Borough=df_combined.loc[0, 'Borough']

print("We will explore the Borough of " + Borough + " Latitude: " + str(Lat) +" Longitude: " + str(Long) )

We will explore the Borough of Scarborough Latitude: 43.806686299999996 Longitude: -79.19435340000001


##### Let's look at the top 50 venues in a 250 meter radius of Scarborough

In [11]:
LIMIT=50
radius=250
# type your answer here
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, Lat, Long, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=U1TSCAOBPQDTO1JTQKWHUAZBTWEVPXNOIYE5KVN32J0PCFT1&client_secret=3ZMFAU5QWV3UAEUJGHKRX5XMSMHJLYZDYN3FK25IV0CNUC2Y&ll=43.806686299999996,-79.19435340000001&v=20180605&radius=250&limit=50'

Examine the JSON results

In [12]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d5aabbca6ec98002c1166a7'},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 0,
  'suggestedBounds': {'ne': {'lat': 43.80893630225, 'lng': -79.19124149036521},
   'sw': {'lat': 43.804436297749994, 'lng': -79.19746530963481}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': []}]}}

Let's look at the data and try to find locations...

In [13]:
#Commented out to save output space
#df_combined

Let's try Downtown Toronto

# Get Latitude / Longitude / Name Vales

Lat=df_combined.loc[51, 'Latitude']
Long=df_combined.loc[51, 'Longitude']
Borough=df_combined.loc[51, 'Borough']



print("We will explore the Borough of " + Borough + " Latitude: " + str(Lat) +" Longitude: " + str(Long) )


In [28]:
LIMIT=50
radius=250

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, Lat, Long, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=U1TSCAOBPQDTO1JTQKWHUAZBTWEVPXNOIYE5KVN32J0PCFT1&client_secret=3ZMFAU5QWV3UAEUJGHKRX5XMSMHJLYZDYN3FK25IV0CNUC2Y&ll=43.667967,-79.3676753&v=20180605&radius=250&limit=50'

##### And let's see if we have results...

In [15]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d5aac577ca06c00230a4c53'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Cabbagetown',
  'headerFullLocation': 'Cabbagetown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 19,
  'suggestedBounds': {'ne': {'lat': 43.67021700225, 'lng': -79.36457059132985},
   'sw': {'lat': 43.665716997749996, 'lng': -79.37078000867015}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b646a6ff964a5205cb12ae3',
       'name': 'Cranberries',
       'location': {'address': '601 Parliament St.',
        'crossStreet': 'at Wellesley St. E',
        'lat': 43.6678427705951,
        'lng': -79.36940687874281,
        'labeledLatLngs': [{'

Let's borrow this function to get venue category

In [16]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Make the results a dataframe

In [17]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Cranberries,Diner,43.667843,-79.369407
1,F'Amelia,Italian Restaurant,43.667536,-79.368613
2,Butter Chicken Factory,Indian Restaurant,43.667072,-79.369184
3,Kingyo Toronto,Japanese Restaurant,43.665895,-79.368415
4,Murgatroid,Restaurant,43.667381,-79.369311


##### How many venues did we get?

In [18]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

19 venues were returned by Foursquare.


## Analyze the neighborhood

Let's see these places on a map

In [19]:
# create map of Toronto using latitude and longitude values
vmap_Toronto = folium.Map(location=[latitude, longitude], zoom_start=15)

# add markers to map
for lat, lng, name, categories in zip(nearby_venues['lat'], nearby_venues['lng'],nearby_venues['name'],nearby_venues['categories']):
    label = '{}, {}'.format(name, categories)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(vmap_Toronto)  
    
vmap_Toronto

### Combine all the neighborhoods - see where we have favorites

###### Check that our variables are set for Toronto:  The geograpical coordinate of Toronto are 43.653963, -79.387207.

In [27]:
print(latitude)
print(longitude)

43.653963
-79.387207


#### Start with the get_nearby_venues function ("borrowed" from the NYC Example)

In [35]:
def getNearbyVenues(names, latitudes, longitudes, radius=250):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Borough', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

##### Run the function and create a new dataframe

In [37]:
# Using Boroughs - Neighborhoods are combined on list

toronto_venues = getNearbyVenues(names=df_combined['Borough'],
                                   latitudes=df_combined['Latitude'],
                                   longitudes=df_combined['Longitude']
                                  )


#### Let's look at our dataset:

In [39]:
print(toronto_venues.shape)
toronto_venues.head()

(835, 7)


Unnamed: 0,Borough,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Scarborough,43.770992,-79.216917,Korean Grill House,43.770812,-79.214502,Korean Restaurant
1,Scarborough,43.773136,-79.239476,Federick Restaurant,43.774697,-79.241142,Hakka Restaurant
2,Scarborough,43.773136,-79.239476,Thai One On,43.774468,-79.241268,Thai Restaurant
3,Scarborough,43.744734,-79.239476,McCowan Park,43.745089,-79.239336,Playground
4,Scarborough,43.716316,-79.239476,Have A Nap Motel,43.718256,-79.240135,Motel


##### Let's see what we have by Borough

In [40]:
toronto_venues.groupby('Borough').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Toronto,28,28,28,28,28,28
Downtown Toronto,496,496,496,496,496,496
East Toronto,51,51,51,51,51,51
East York,35,35,35,35,35,35
Etobicoke,18,18,18,18,18,18
Mississauga,1,1,1,1,1,1
North York,104,104,104,104,104,104
Queen's Park,4,4,4,4,4,4
Scarborough,21,21,21,21,21,21
West Toronto,69,69,69,69,69,69


##### And unique categories by Borough

In [41]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 192 uniques categories.


### Analyze each Borough

In [48]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Borough'] = toronto_venues['Borough'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()


Unnamed: 0,Borough,Adult Boutique,Airport Lounge,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Stop,Café,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Dumpling Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Festival,Field,Film Studio,Fish & Chips Shop,Fish Market,Flower Shop,Food,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Garden,Gastropub,Gay Bar,General Entertainment,General Travel,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Hardware Store,Health & Beauty Service,History Museum,Hobby Shop,Home Service,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Kids Store,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Luggage Store,Mac & Cheese Joint,Malay Restaurant,Martial Arts Dojo,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Moroccan Restaurant,Motel,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Noodle House,Opera House,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Restaurant,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Shopping Plaza,Smoke Shop,Smoothie Shop,Snack Place,Social Club,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Scarborough,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Scarborough,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Scarborough,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Scarborough,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Scarborough,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Get the sized of the dataset

In [49]:
toronto_onehot.shape

(835, 193)

#### Group the Boroughs by mean and frequency of each category:

In [51]:
toronto_grouped = toronto_onehot.groupby('Borough').mean().reset_index()
toronto_grouped

Unnamed: 0,Borough,Adult Boutique,Airport Lounge,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Stop,Café,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Dumpling Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Festival,Field,Film Studio,Fish & Chips Shop,Fish Market,Flower Shop,Food,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Garden,Gastropub,Gay Bar,General Entertainment,General Travel,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Hardware Store,Health & Beauty Service,History Museum,Hobby Shop,Home Service,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Kids Store,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Luggage Store,Mac & Cheese Joint,Malay Restaurant,Martial Arts Dojo,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Moroccan Restaurant,Motel,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Noodle House,Opera House,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Restaurant,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Shopping Plaza,Smoke Shop,Smoothie Shop,Snack Place,Social Club,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Downtown Toronto,0.002016,0.002016,0.006048,0.010081,0.002016,0.004032,0.0,0.004032,0.0,0.020161,0.002016,0.022177,0.0,0.0,0.012097,0.002016,0.004032,0.0,0.004032,0.002016,0.014113,0.0,0.012097,0.0,0.004032,0.008065,0.014113,0.002016,0.0,0.058468,0.002016,0.0,0.004032,0.002016,0.008065,0.002016,0.004032,0.012097,0.122984,0.002016,0.002016,0.002016,0.004032,0.002016,0.004032,0.0,0.002016,0.002016,0.0,0.004032,0.002016,0.024194,0.002016,0.004032,0.0,0.008065,0.002016,0.002016,0.0,0.002016,0.004032,0.0,0.004032,0.0,0.0,0.002016,0.006048,0.002016,0.0,0.0,0.0,0.002016,0.004032,0.0,0.002016,0.004032,0.002016,0.002016,0.002016,0.0,0.0,0.002016,0.020161,0.0,0.004032,0.004032,0.006048,0.0,0.002016,0.004032,0.006048,0.016129,0.014113,0.0,0.0,0.0,0.002016,0.0,0.0,0.002016,0.020161,0.002016,0.002016,0.004032,0.002016,0.0,0.002016,0.028226,0.014113,0.002016,0.0,0.0,0.002016,0.002016,0.002016,0.0,0.008065,0.002016,0.0,0.0,0.0,0.004032,0.002016,0.014113,0.008065,0.004032,0.0,0.002016,0.004032,0.002016,0.002016,0.002016,0.004032,0.002016,0.002016,0.002016,0.012097,0.002016,0.0,0.008065,0.018145,0.0,0.006048,0.002016,0.002016,0.0,0.0,0.012097,0.006048,0.0,0.032258,0.0,0.012097,0.002016,0.022177,0.0,0.0,0.012097,0.004032,0.0,0.002016,0.002016,0.002016,0.0,0.006048,0.004032,0.002016,0.006048,0.006048,0.014113,0.004032,0.010081,0.004032,0.002016,0.012097,0.0,0.018145,0.008065,0.002016,0.002016,0.0,0.0,0.012097,0.002016,0.0,0.010081,0.0,0.006048,0.002016,0.002016,0.0,0.0
2,East Toronto,0.0,0.0,0.019608,0.0,0.0,0.0,0.019608,0.0,0.0,0.019608,0.019608,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.039216,0.019608,0.0,0.019608,0.0,0.0,0.039216,0.0,0.0,0.0,0.019608,0.019608,0.0,0.019608,0.0,0.039216,0.0,0.0,0.0,0.019608,0.0,0.0,0.019608,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.039216,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.019608,0.019608,0.019608,0.0,0.0,0.0,0.0,0.0,0.098039,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.039216,0.0,0.0,0.019608,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.039216,0.0,0.019608,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608
3,East York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.057143,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,0.057143,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571
4,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.111111,0.166667,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0
5,Mississauga,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,North York,0.0,0.0,0.0,0.0,0.0,0.028846,0.0,0.0,0.0,0.019231,0.019231,0.0,0.009615,0.0,0.0,0.009615,0.0,0.009615,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.009615,0.009615,0.0,0.0,0.0,0.009615,0.0,0.0,0.009615,0.0,0.086538,0.0,0.086538,0.0,0.0,0.0,0.009615,0.0,0.0,0.009615,0.009615,0.0,0.0,0.0,0.0,0.019231,0.0,0.0,0.009615,0.0,0.009615,0.0,0.0,0.0,0.0,0.019231,0.0,0.009615,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009615,0.009615,0.0,0.009615,0.0,0.0,0.0,0.0,0.0,0.0,0.009615,0.0,0.0,0.0,0.009615,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009615,0.0,0.028846,0.019231,0.0,0.019231,0.009615,0.0,0.0,0.0,0.0,0.019231,0.0,0.009615,0.0,0.0,0.0,0.009615,0.0,0.009615,0.0,0.0,0.009615,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.0,0.009615,0.0,0.028846,0.009615,0.0,0.0,0.0,0.019231,0.009615,0.009615,0.0,0.0,0.038462,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.009615,0.009615,0.0,0.009615,0.0,0.0,0.0,0.0,0.0,0.009615,0.0,0.0,0.0,0.038462,0.0,0.0,0.019231,0.009615,0.009615,0.009615,0.0,0.0,0.009615,0.0,0.0,0.0,0.009615,0.0,0.0,0.0,0.0,0.019231,0.019231,0.0
7,Queen's Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Scarborough,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.095238,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,West Toronto,0.0,0.0,0.0,0.014493,0.0,0.028986,0.0,0.0,0.014493,0.028986,0.014493,0.072464,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.014493,0.0,0.014493,0.0,0.0,0.0,0.014493,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.057971,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.014493,0.0,0.0,0.0,0.028986,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028986,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.014493,0.0,0.0,0.0,0.014493,0.0,0.0,0.014493,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.028986,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.028986,0.0,0.0,0.0,0.014493,0.0,0.0,0.014493,0.0,0.0,0.0,0.014493,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.028986,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.014493,0.028986,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.014493,0.0,0.0,0.014493,0.014493,0.014493,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.014493,0.028986,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028986,0.0,0.028986,0.0,0.0,0.0,0.014493


#### New size of dataset

In [53]:
toronto_grouped.shape

(11, 193)

### Let's print the top 5 most common venues by borough

In [54]:
num_top_venues = 5

for hood in toronto_grouped['Borough']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Borough'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
                venue  freq
0         Coffee Shop  0.14
1         Pizza Place  0.07
2        Dessert Shop  0.07
3                Café  0.07
4  Italian Restaurant  0.07


----Downtown Toronto----
                venue  freq
0         Coffee Shop  0.12
1                Café  0.06
2  Italian Restaurant  0.03
3          Restaurant  0.03
4       Deli / Bodega  0.02


----East Toronto----
                venue  freq
0    Greek Restaurant  0.10
1      Ice Cream Shop  0.06
2               Diner  0.04
3  Italian Restaurant  0.04
4         Coffee Shop  0.04


----East York----
               venue  freq
0  Indian Restaurant  0.06
1           Pharmacy  0.06
2       Burger Joint  0.06
3           Bus Stop  0.06
4        Coffee Shop  0.06


----Etobicoke----
         venue  freq
0  Coffee Shop  0.17
1  Pizza Place  0.17
2     Pharmacy  0.11
3   Beer Store  0.06
4         Pool  0.06


----Mississauga----
                       venue  freq
0                Coffee Shop   1.0
1 

##### Note:  There's not much in Missaugua :)

### Let's sort the values and put them in a dataframe

In [56]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Get the top 5 venues by Borough

In [65]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Central Toronto,Coffee Shop,Pizza Place,Italian Restaurant,Café,Dessert Shop
1,Downtown Toronto,Coffee Shop,Café,Restaurant,Italian Restaurant,Deli / Bodega
2,East Toronto,Greek Restaurant,Ice Cream Shop,Café,Brewery,Italian Restaurant
3,East York,Pharmacy,Breakfast Spot,Burger Joint,Bus Stop,Indian Restaurant
4,Etobicoke,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool


## Cluster Neighborhoods:

#### K-means to create 5 neighborhoods

In [66]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 1, 3, 0, 2, 0, 0], dtype=int32)

#### Create a dataframe to combine clusters and top venues for each neighborhood

In [67]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_combined

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Borough'), on='Borough')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store


### Let's end with a map of the clusters

In [68]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

#### Let's take a look at each cluster

In [69]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
1,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
2,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
3,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
4,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
5,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
6,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
7,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
8,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store
9,Scarborough,0,Fast Food Restaurant,Sandwich Place,Chinese Restaurant,Pizza Place,Thrift / Vintage Store


In [70]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
88,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool
89,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool
90,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool
91,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool
92,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool
93,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool
94,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool
95,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool
99,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool
100,Etobicoke,1,Pizza Place,Coffee Shop,Pharmacy,Liquor Store,Pool


In [71]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
85,Queen's Park,2,Bubble Tea Shop,Sushi Restaurant,Sculpture Garden,Yoga Studio,Dumpling Restaurant


In [72]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
86,Mississauga,3,Coffee Shop,Yoga Studio,Dumpling Restaurant,Film Studio,Field


In [73]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
73,York,4,Playground,Sandwich Place,Park,Restaurant,Dog Run
74,York,4,Playground,Sandwich Place,Park,Restaurant,Dog Run
80,York,4,Playground,Sandwich Place,Park,Restaurant,Dog Run
81,York,4,Playground,Sandwich Place,Park,Restaurant,Dog Run
98,York,4,Playground,Sandwich Place,Park,Restaurant,Dog Run
