# Clustering and segmentation for neighborhoods in Toronto

### Needed libraries for the assignment:

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

### Getting the table from the Wiki page:

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
website = requests.get(url).text


soup = BeautifulSoup(website,'lxml')


# Getting the table
tablePostalCodes = soup.find('table',{'class':'wikitable sortable'})

# Getting a list with each row of the table
detailPostalCodes = tablePostalCodes.find_all('tr')

# Deleting the row with the heading of the table
del detailPostalCodes[0]




### Building a summarized list of lists of Postal codes of Canada

In [3]:
#Creating an empty list
summarizedCodesCanada = []

#Initializing some temporal variables
tempPostcode = None
tempBorough = None
tempNeighbourhood = None

#Reading each row from detailPostalCodes and building the summarizedCodesCanada list.
for postalCode in detailPostalCodes:
    detailPostalCode = postalCode.find_all('td')
    postcode = detailPostalCode[0].text
    borough = detailPostalCode[1].text
    neighbourhood = detailPostalCode[2].text.rstrip()
    #Only taking into account the rows with a borough
    if borough != 'Not assigned':
        #Assigning the borough to the neighbourhood when there is no neighbourhood.
        if neighbourhood == 'Not assigned':
            neighbourhood = borough
        if tempPostcode != postcode:
            if tempPostcode is not None:
                #Appending the summarized row to the summarized list
                summarizedCodesCanada.append([tempPostcode, tempBorough, tempNeighbourhood])
            #Temporaly storing the current row 
            tempPostcode = postcode
            tempBorough = borough
            tempNeighbourhood = neighbourhood
        else:
            #When there are many neigbourhoods for the same postcode, all neigbourhoods are grouped in the same cell 
            tempNeighbourhood = tempNeighbourhood + ', ' + neighbourhood
        
#Appending the last summarized row 
summarizedCodesCanada.append([tempPostcode, tempBorough, tempNeighbourhood])


### Building the dataframe from the list

In [4]:
# define the dataframe columns
column_names = ['PostalCode', 'Borough', 'Neighborhood']

#Creating the dataframe from the list
dfSummarizedCodesCanada = pd.DataFrame(summarizedCodesCanada, columns = column_names)

# Print first 5 rows
dfSummarizedCodesCanada.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [5]:
# Print the number of rows of the dataframe
print(dfSummarizedCodesCanada.shape)


(103, 3)


### Getting latitud and longitude for each PostalCode

In [6]:
# Installing geocoder
!conda install -c conda-forge geocoder --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.5 MB

The following NEW packages will be INSTALLED:

    geocoder:        1.38.1-py_1       conda-forge
    ratelim:         0.1.6-py_2        conda-forge

The following packages will be UPDATED:



In [7]:
# Importing geocoder

import geocoder

In [8]:
#Getting latitude and longitude for each PostalCode

# Creating empty latitude and longitude lists

latitudeList = []
longitudeList = []

# Getting  latitudeList and longitudeList filled with the data from each PostalCode

for index, postalCode in dfSummarizedCodesCanada.iterrows():
    # Building the address from the PostalCode and the Borough
    # address = '{}, {}'.format(postalCode["PostalCode"], postalCode["Borough"])
    address = postalCode["PostalCode"]
    g = geocoder.arcgis(address)
    lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    latitudeList.append(latitude)
    longitudeList.append(longitude)

# Adding Latitude and Longitude columns to the dataframe
dfSummarizedCodesCanada['Latitude'] = latitudeList
dfSummarizedCodesCanada['Longitude'] = longitudeList





In [9]:
# Displayng dataframe.
dfSummarizedCodesCanada

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752440,-79.329271
1,M4A,North York,Victoria Village,43.730421,-79.313320
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.655120,-79.362640
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.723125,-79.451589
4,M7A,Queen's Park,Queen's Park,43.661102,-79.391035
5,M9A,Etobicoke,Islington Avenue,43.662242,-79.528379
6,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517
7,M3B,North York,Don Mills North,43.749195,-79.361905
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.707535,-79.311773
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657363,-79.378180


### Explore and cluster the neighborhoods in Toronto

In [10]:
# Importing the needed libraries

import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't
#completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude
#and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 

import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         560 KB

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge


Downloading and Extracting Packages
altair-2.2.2         | 462 K

### Creating a dataframe with only boroughs that contains the word Toronto

In [11]:
dfToronto = dfSummarizedCodesCanada[dfSummarizedCodesCanada['Borough'].str.contains("Toronto")].reset_index(drop=True)
dfToronto

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65512,-79.36264
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657363,-79.37818
2,M5C,Downtown Toronto,St. James Town,43.65121,-79.375481
3,M4E,East Toronto,The Beaches,43.676845,-79.295225
4,M5E,Downtown Toronto,Berczy Park,43.64516,-79.373675
5,M5G,Downtown Toronto,Central Bay Street,43.656091,-79.38493
6,M6G,Downtown Toronto,Christie,43.668781,-79.42071
7,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.649515,-79.382503
8,M6H,West Toronto,"Dovercourt Village, Dufferin",43.665087,-79.438705
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.62347,-79.391507


### Getting Toronto's cordinates

In [12]:
address = 'Toronto, ON'
g = geocoder.arcgis(address)
lat_lng_coords = g.latlng
latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.648690000000045, -79.38543999999996.


In [13]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=13)

# add markers to map
for lat, lng, label in zip(dfToronto['Latitude'], dfToronto['Longitude'],\
                           dfToronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Define Foursquare Credentials and Version

In [14]:
CLIENT_ID = 'GPEY0BDPK20BAE1IL5QHEOQZB223IUL5VZEJ2E21XTM1WELQ' # your Foursquare ID
CLIENT_SECRET = 'PFI42R4CS5OXDHUCWOBIQMLWCP4EZRUOXH0BNMPID5QZJQN2' # your Foursquare 
#Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GPEY0BDPK20BAE1IL5QHEOQZB223IUL5VZEJ2E21XTM1WELQ
CLIENT_SECRET:PFI42R4CS5OXDHUCWOBIQMLWCP4EZRUOXH0BNMPID5QZJQN2


### Explore Neighborhoods in Toronto

In [15]:
#function to process all the neighborhoods in Manhattan
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}\
        &client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for\
                                  item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [16]:
# Run the above function on each neighborhood and create a new dataframe called toronto_venues.

toronto_venues = getNearbyVenues(names=dfToronto['Neighborhood'],
                                   latitudes=dfToronto['Latitude'],
                                   longitudes=dfToronto['Longitude']
                                  )

Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
The Danforth West, Riverdale
Design Exchange, Toronto Dominion Centre
Brockton, Exhibition Place, Parkdale Village
The Beaches West, India Bazaar
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North, Forest Hill West
High Park, The Junction South
North Toronto West
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
Harbord, University of Toronto
Runnymede, Swansea
Moore Park, Summerhill East
Chinatown, Grange Park, Kensington Market
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown, St. James Town
Fir

In [17]:
#Let's check the size of the resulting dataframe
print(toronto_venues.shape)
toronto_venues.head()

(1757, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront, Regent Park",43.65512,-79.36264,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Harbourfront, Regent Park",43.65512,-79.36264,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Harbourfront, Regent Park",43.65512,-79.36264,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
3,"Harbourfront, Regent Park",43.65512,-79.36264,Cocina Economica,43.654959,-79.365657,Mexican Restaurant
4,"Harbourfront, Regent Park",43.65512,-79.36264,Body Blitz Spa East,43.654735,-79.359874,Spa


In [18]:
#Let's check how many venues were returned for each neighborhood

toronto_venues.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,62,62,62,62,62,62
"Brockton, Exhibition Place, Parkdale Village",71,71,71,71,71,71
Business Reply Mail Processing Centre 969 Eastern,100,100,100,100,100,100
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",68,68,68,68,68,68
"Cabbagetown, St. James Town",43,43,43,43,43,43
Central Bay Street,100,100,100,100,100,100
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,10,10,10,10,10,10
Church and Wellesley,82,82,82,82,82,82


In [19]:
# Let's find out how many unique categories can be curated from all the returned venues

print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].\
                                                    unique())))

There are 213 uniques categories.


### Analyze Each Neighborhood

In [20]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="",prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
torontoOnehotColumns = list(toronto_onehot.columns)
neighborhoodIndex = torontoOnehotColumns.index('Neighborhood')

fixed_columns = [torontoOnehotColumns[neighborhoodIndex]] + torontoOnehotColumns[0:neighborhoodIndex] + torontoOnehotColumns[neighborhoodIndex + 1 :]
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Butcher,Café,Camera Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Costume Shop,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,General Travel,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Health Food Store,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Museum,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Park,Peruvian Restaurant,Pet Store,Pharmacy,Photography Studio,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Poutine Place,Pub,Ramen Restaurant,Record Shop,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soup Place,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [21]:
#let's examine the new dataframe size

toronto_onehot.shape

(1757, 213)

In [22]:
#let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Butcher,Café,Camera Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Costume Shop,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,General Travel,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hardware Store,Health Food Store,Historic Site,History Museum,Hobby Shop,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Museum,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Park,Peruvian Restaurant,Pet Store,Pharmacy,Photography Studio,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Poutine Place,Pub,Ramen Restaurant,Record Shop,Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soup Place,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.03,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.02,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.01,0.02,0.0,0.0,0.01,0.03,0.01,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.02,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.032258,0.0,0.0,0.016129,0.032258,0.0,0.016129,0.016129,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.032258,0.0,0.0,0.0,0.016129,0.048387,0.112903,0.0,0.0,0.0,0.0,0.016129,0.0,0.016129,0.0,0.016129,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.016129,0.016129,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048387,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.016129,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.028169,0.0,0.014085,0.0,0.0,0.014085,0.0,0.0,0.014085,0.0,0.028169,0.0,0.028169,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.056338,0.0,0.014085,0.0,0.0,0.0,0.014085,0.0,0.014085,0.084507,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.014085,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.014085,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.056338,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028169,0.014085,0.0,0.014085,0.0,0.0,0.0,0.028169,0.014085,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.014085,0.014085,0.0,0.0,0.056338,0.0,0.0,0.0,0.042254,0.0,0.014085,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.014085,0.0,0.014085,0.0,0.014085,0.0,0.0,0.0,0.0,0.028169,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.014085,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.028169,0.0,0.014085,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.03,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.09,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.03,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.03,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.029412,0.0,0.044118,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.0,0.014706,0.0,0.0,0.0,0.044118,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.102941,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.029412,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.014706,0.014706,0.0,0.0,0.0,0.014706,0.014706,0.073529,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.014706,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.0,0.014706,0.0,0.0,0.0,0.0,0.029412,0.014706,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.029412,0.014706,0.0,0.029412,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.029412,0.0,0.0,0.014706,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706
5,"Cabbagetown, St. James Town",0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.046512,0.023256,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.046512,0.0,0.0,0.0,0.046512,0.0,0.0,0.0,0.0,0.069767,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.023256,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.046512,0.023256,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.046512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.023256,0.023256,0.0,0.0,0.046512,0.023256,0.023256,0.0,0.0,0.023256,0.0,0.0,0.069767,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.06,0.0,0.09,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.03,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.01,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.04,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.08,0.0,0.02,0.01,0.05,0.0,0.0,0.0,0.02,0.03,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.03,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.05,0.0,0.04,0.01,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.012195,0.012195,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.012195,0.0,0.02439,0.0,0.02439,0.012195,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.109756,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.02439,0.0,0.0,0.0,0.012195,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.04878,0.012195,0.0,0.0,0.0,0.0,0.0,0.012195,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.02439,0.0,0.0,0.012195,0.012195,0.0,0.0,0.012195,0.060976,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.02439,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.012195,0.0,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.02439,0.012195,0.0,0.036585,0.012195,0.012195,0.012195,0.0,0.012195,0.0,0.012195,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.012195,0.012195,0.0,0.036585,0.0,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.012195,0.012195,0.012195,0.0,0.0,0.0,0.0,0.0,0.012195,0.012195,0.0,0.0,0.012195,0.0


In [23]:
#Let's confirm the new size
toronto_grouped.shape

(37, 213)

In [24]:
#Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].\
    T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).\
          reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                 venue  freq
0          Coffee Shop  0.09
1                 Café  0.07
2                Hotel  0.05
3  Japanese Restaurant  0.03
4        Deli / Bodega  0.03


----Berczy Park----
          venue  freq
0   Coffee Shop  0.11
1    Restaurant  0.05
2  Cocktail Bar  0.05
3        Lounge  0.03
4   Cheese Shop  0.03


----Brockton, Exhibition Place, Parkdale Village----
                    venue  freq
0             Coffee Shop  0.08
1              Restaurant  0.06
2                    Café  0.06
3  Furniture / Home Store  0.06
4          Sandwich Place  0.04


----Business Reply Mail Processing Centre 969 Eastern----
         venue  freq
0  Coffee Shop  0.09
1          Bar  0.06
2         Café  0.05
3        Hotel  0.04
4   Steakhouse  0.04


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
                venue  freq
0         Coffee Shop  0.10
1  Italian Restaurant  0.07
2    

In [25]:
#Let's put that into a pandas dataframe

#First, let's write a function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [26]:
# Now let's create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = \
    return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Hotel,American Restaurant,Steakhouse,Gastropub,Deli / Bodega,Restaurant,Asian Restaurant,Burger Joint
1,Berczy Park,Coffee Shop,Cocktail Bar,Restaurant,Steakhouse,Farmers Market,Café,Lounge,Seafood Restaurant,Beer Bar,Breakfast Spot
2,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Café,Furniture / Home Store,Restaurant,Sandwich Place,Bar,Beer Bar,Supermarket,Italian Restaurant,Hotel
3,Business Reply Mail Processing Centre 969 Eastern,Coffee Shop,Bar,Café,Steakhouse,Hotel,American Restaurant,Japanese Restaurant,Pizza Place,Sushi Restaurant,Pub
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Coffee Shop,Italian Restaurant,Café,Bar,Speakeasy,Bakery,Park,Pub,Restaurant,Gym / Fitness Center


### Cluster Neighborhoods

In [27]:
#Run k-means to cluster the neighborhood into 5 clusters.

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).\
fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:50] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, 0, 2, 0, 4, 3,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 4], dtype=int32)

In [28]:
#Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = dfToronto

# merge toronto_grouped with toronto_data to add latitude/longitude 
#for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.\
                                         set_index('Neighborhood'), \
                                         on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65512,-79.36264,0.0,Coffee Shop,Gym / Fitness Center,Breakfast Spot,Spa,Thai Restaurant,Food Truck,Mexican Restaurant,Event Space,Restaurant,Electronics Store
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657363,-79.37818,0.0,Coffee Shop,Clothing Store,Cosmetics Shop,Fast Food Restaurant,Café,Middle Eastern Restaurant,Furniture / Home Store,Tea Room,Ramen Restaurant,Plaza
2,M5C,Downtown Toronto,St. James Town,43.65121,-79.375481,0.0,Coffee Shop,Hotel,Café,Restaurant,Cosmetics Shop,Bakery,Clothing Store,Breakfast Spot,Gastropub,Seafood Restaurant
3,M4E,East Toronto,The Beaches,43.676845,-79.295225,0.0,Health Food Store,Pub,Trail,Yoga Studio,Ethiopian Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop
4,M5E,Downtown Toronto,Berczy Park,43.64516,-79.373675,0.0,Coffee Shop,Cocktail Bar,Restaurant,Steakhouse,Farmers Market,Café,Lounge,Seafood Restaurant,Beer Bar,Breakfast Spot


In [29]:
# Some Neighborhoods are not returning data, so I decided to delete those rows
toronto_merged = toronto_merged.dropna(subset=['Cluster Labels'])
toronto_merged = toronto_merged.astype ({"Cluster Labels": int})
toronto_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65512,-79.36264,0,Coffee Shop,Gym / Fitness Center,Breakfast Spot,Spa,Thai Restaurant,Food Truck,Mexican Restaurant,Event Space,Restaurant,Electronics Store
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657363,-79.37818,0,Coffee Shop,Clothing Store,Cosmetics Shop,Fast Food Restaurant,Café,Middle Eastern Restaurant,Furniture / Home Store,Tea Room,Ramen Restaurant,Plaza
2,M5C,Downtown Toronto,St. James Town,43.65121,-79.375481,0,Coffee Shop,Hotel,Café,Restaurant,Cosmetics Shop,Bakery,Clothing Store,Breakfast Spot,Gastropub,Seafood Restaurant
3,M4E,East Toronto,The Beaches,43.676845,-79.295225,0,Health Food Store,Pub,Trail,Yoga Studio,Ethiopian Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop
4,M5E,Downtown Toronto,Berczy Park,43.64516,-79.373675,0,Coffee Shop,Cocktail Bar,Restaurant,Steakhouse,Farmers Market,Café,Lounge,Seafood Restaurant,Beer Bar,Breakfast Spot
5,M5G,Downtown Toronto,Central Bay Street,43.656091,-79.38493,0,Coffee Shop,Clothing Store,Cosmetics Shop,Sushi Restaurant,Fast Food Restaurant,Tea Room,Ice Cream Shop,Middle Eastern Restaurant,Plaza,Spa
6,M6G,Downtown Toronto,Christie,43.668781,-79.42071,0,Café,Grocery Store,Athletics & Sports,Coffee Shop,Italian Restaurant,Baby Store,Playground,Donut Shop,Dog Run,Food & Drink Shop
7,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.649515,-79.382503,0,Coffee Shop,Café,Hotel,American Restaurant,Steakhouse,Gastropub,Deli / Bodega,Restaurant,Asian Restaurant,Burger Joint
8,M6H,West Toronto,"Dovercourt Village, Dufferin",43.665087,-79.438705,0,Park,Pet Store,Furniture / Home Store,Brazilian Restaurant,Middle Eastern Restaurant,Bar,Bank,Bakery,Fast Food Restaurant,Liquor Store
9,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.62347,-79.391507,2,Harbor / Marina,Café,Music Venue,Yoga Studio,Event Space,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop


In [30]:
#Finally, let's visualize the resulting clusters
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], \
                                  toronto_merged['Longitude'], \
                                  toronto_merged['Neighborhood'], \
                                  toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine each cluster

## Cluster 1

In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, \
                     toronto_merged.columns[[1] + list(range(5, \
                                                               toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Gym / Fitness Center,Breakfast Spot,Spa,Thai Restaurant,Food Truck,Mexican Restaurant,Event Space,Restaurant,Electronics Store
1,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Fast Food Restaurant,Café,Middle Eastern Restaurant,Furniture / Home Store,Tea Room,Ramen Restaurant,Plaza
2,Downtown Toronto,0,Coffee Shop,Hotel,Café,Restaurant,Cosmetics Shop,Bakery,Clothing Store,Breakfast Spot,Gastropub,Seafood Restaurant
3,East Toronto,0,Health Food Store,Pub,Trail,Yoga Studio,Ethiopian Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop
4,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Restaurant,Steakhouse,Farmers Market,Café,Lounge,Seafood Restaurant,Beer Bar,Breakfast Spot
5,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Sushi Restaurant,Fast Food Restaurant,Tea Room,Ice Cream Shop,Middle Eastern Restaurant,Plaza,Spa
6,Downtown Toronto,0,Café,Grocery Store,Athletics & Sports,Coffee Shop,Italian Restaurant,Baby Store,Playground,Donut Shop,Dog Run,Food & Drink Shop
7,Downtown Toronto,0,Coffee Shop,Café,Hotel,American Restaurant,Steakhouse,Gastropub,Deli / Bodega,Restaurant,Asian Restaurant,Burger Joint
8,West Toronto,0,Park,Pet Store,Furniture / Home Store,Brazilian Restaurant,Middle Eastern Restaurant,Bar,Bank,Bakery,Fast Food Restaurant,Liquor Store
10,West Toronto,0,Bar,Coffee Shop,Cocktail Bar,Restaurant,Asian Restaurant,Pizza Place,New American Restaurant,Bakery,French Restaurant,Vietnamese Restaurant


This would be the Food cluster. In this cluster you can find different types of restaurants, cafes and Bars.

## Cluster 2

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, \
                     toronto_merged.columns[[1] + list(range(5, \
                                                               toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,1,Playground,Gym Pool,Park,Garden,Ethiopian Restaurant,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant
32,Downtown Toronto,1,Playground,Building,Park,Bank,Event Space,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market


This would be the Sports cluster. You would find in this cluster open spaces for outdoor activities and sports

## Cluster 3

In [34]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, \
                     toronto_merged.columns[[1] + list(range(5, \
                                                               toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Downtown Toronto,2,Harbor / Marina,Café,Music Venue,Yoga Studio,Event Space,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop


This would be the Harbor cluster. An open space for activiites related to the harbor.

## Cluster 4

In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, \
                     toronto_merged.columns[[1] + list(range(5, \
                                                               toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Central Toronto,3,Photography Studio,Bus Line,Swim School,Event Space,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant


This would be Photography cluster. This is the only cluster where you can find photography studios and it is the most common venue too

## Cluster 5

In [36]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, \
                     toronto_merged.columns[[1] + list(range(5, \
                                                               toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,East Toronto,4,Bus Line,Discount Store,Park,Grocery Store,Yoga Studio,Event Space,Food,Flower Shop,Flea Market,Fish Market
19,Central Toronto,4,Food & Drink Shop,Hotel,Park,Gym / Fitness Center,Gym,Breakfast Spot,Clothing Store,Falafel Restaurant,Food,Flower Shop
20,Central Toronto,4,Arts & Crafts Store,Park,Yoga Studio,Event Space,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop
21,West Toronto,4,Convenience Store,Park,Sandwich Place,Yoga Studio,Ethiopian Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop


This would be the Balanced Cluster. Probably in this cluster you can fullfill all your needs without going to another cluster