# The Battle of the Neighborhoods

##### Author: Omer Aytac

## Imports

In [5]:
# imports 
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

! pip install folium==0.5.0
import folium # map rendering library

print('Libraries imported.')

Collecting folium==0.5.0
  Downloading folium-0.5.0.tar.gz (79 kB)
[K     |████████████████████████████████| 79 kB 6.3 MB/s eta 0:00:011
[?25hCollecting branca
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Building wheels for collected packages: folium
  Building wheel for folium (setup.py) ... [?25ldone
[?25h  Created wheel for folium: filename=folium-0.5.0-py3-none-any.whl size=76240 sha256=5d16c86df31d34e5c686d472e15be9e3edad341b83ba70144fc9b908f1ffaac4
  Stored in directory: /tmp/wsuser/.cache/pip/wheels/b2/2f/2c/109e446b990d663ea5ce9b078b5e7c1a9c45cca91f377080f8
Successfully built folium
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.5.0
Libraries imported.


## Data

#### Getting the Table from Wikipedia

In [6]:
table = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
# type(table)
df = pd.DataFrame(table[0])
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### Cleaning the rows with "Not assigned" in Borough Column

In [7]:
df_cleaned = df[df["Borough"] != "Not assigned"]
df_cleaned.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### Check whether there is a row with "Not assigned" neighbourhood or not

In [8]:
df_temp = df_cleaned[df_cleaned["Neighbourhood"] == "Not assigned"]
df_temp.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood


#### Count the Postal Code Values to check the rows if there are more than one row with same Postal Code

In [9]:
print(len(df_cleaned['Postal Code'].unique()))
print(df_cleaned.shape)

103
(103, 3)


#### Add the latitued and longitude to our dataframe from the csv that is downloaded

In [10]:
!wget -q -O 'toronto_data.csv' http://cocl.us/Geospatial_data
df_locations = pd.read_csv("toronto_data.csv")
df_locations.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
df_new = df_cleaned.merge(df_locations, on="Postal Code", how = 'inner')
df_new.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


#### Use geopy library to get the latitude and longitude values of Toronto.

In [12]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Create a map of Toronto with neighborhoods superimposed on top.

In [13]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_new['Latitude'], df_new['Longitude'], df_new['Borough'], df_new['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [14]:
downtown_data = df_new[df_new['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
downtown_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [15]:
address = 'Downtown Toronto, ON'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto are 43.6563221, -79.3809161.


In [18]:
# create map of Manhattan using latitude and longitude values
map_downtown = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(downtown_data['Latitude'], downtown_data['Longitude'], downtown_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown

#### Foursquare

In [30]:
CLIENT_ID = 'N4P2TZNKUUUJI4A42H5DIOZROY1FXMCVJN24040XR4LIR0YD' # your Foursquare ID
CLIENT_SECRET = 'JR2X5FNJRW25BA4WBN5ZOARLHNLYLBNG2I4WBKLBQBAQGE4X' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

foods = '4d4b7105d754a06374d81259' # 'Root' category for all food-related venues

Your credentails:
CLIENT_ID: N4P2TZNKUUUJI4A42H5DIOZROY1FXMCVJN24040XR4LIR0YD
CLIENT_SECRET:JR2X5FNJRW25BA4WBN5ZOARLHNLYLBNG2I4WBKLBQBAQGE4X


In [31]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            foods,
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [32]:
downtown_venues = getNearbyVenues(names=downtown_data['Neighbourhood'],
                                   latitudes=downtown_data['Latitude'],
                                   longitudes=downtown_data['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town, Cabbagetown
First Canadian Place, Underground city
Church and Wellesley


In [33]:
downtown_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
1,"Regent Park, Harbourfront",43.65426,-79.360636,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
2,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,"Regent Park, Harbourfront",43.65426,-79.360636,Brick Street Bakery,43.650574,-79.359539,Bakery
4,"Regent Park, Harbourfront",43.65426,-79.360636,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant


In [34]:
downtown_venues.shape

(1092, 7)

In [35]:
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))

There are 83 uniques categories.


In [36]:
downtown_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,50,50,50,50,50,50
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",2,2,2,2,2,2
Central Bay Street,69,69,69,69,69,69
Christie,6,6,6,6,6,6
Church and Wellesley,63,63,63,63,63,63
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"First Canadian Place, Underground city",100,100,100,100,100,100
"Garden District, Ryerson",95,95,95,95,95,95
"Harbourfront East, Union Station, Toronto Islands",83,83,83,83,83,83
"Kensington Market, Chinatown, Grange Park",56,56,56,56,56,56


In [37]:
# one hot encoding
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]

downtown_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Belgian Restaurant,Bistro,Brazilian Restaurant,Breakfast Spot,Burger Joint,Burrito Place,Cafeteria,Café,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,Comfort Food Restaurant,Creperie,Deli / Bodega,Dim Sum Restaurant,Diner,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Gluten-free Restaurant,Greek Restaurant,Halal Restaurant,Hot Dog Joint,Indian Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,New American Restaurant,Noodle House,North Indian Restaurant,Persian Restaurant,Pizza Place,Poke Place,Polish Restaurant,Portuguese Restaurant,Poutine Place,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,Soup Place,Spanish Restaurant,Sri Lankan Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [38]:
downtown_onehot.shape

(1092, 84)

In [39]:
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Arepa Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Belgian Restaurant,Bistro,Brazilian Restaurant,Breakfast Spot,Burger Joint,Burrito Place,Cafeteria,Café,Caribbean Restaurant,Chinese Restaurant,Colombian Restaurant,Comfort Food Restaurant,Creperie,Deli / Bodega,Dim Sum Restaurant,Diner,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Gluten-free Restaurant,Greek Restaurant,Halal Restaurant,Hot Dog Joint,Indian Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,New American Restaurant,Noodle House,North Indian Restaurant,Persian Restaurant,Pizza Place,Poke Place,Polish Restaurant,Portuguese Restaurant,Poutine Place,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,Soup Place,Spanish Restaurant,Sri Lankan Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
0,Berczy Park,0.0,0.02,0.0,0.0,0.0,0.02,0.08,0.02,0.04,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.02,0.06,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.06,0.0,0.08,0.04,0.0,0.0,0.0,0.0,0.04,0.06,0.0,0.0,0.02,0.02,0.0,0.04,0.0,0.0
1,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028986,0.0,0.0,0.115942,0.0,0.057971,0.0,0.0,0.0,0.028986,0.0,0.014493,0.0,0.014493,0.0,0.0,0.0,0.0,0.028986,0.028986,0.0,0.0,0.028986,0.0,0.014493,0.043478,0.014493,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.057971,0.043478,0.014493,0.0,0.014493,0.0,0.028986,0.014493,0.0,0.0,0.014493,0.0,0.0,0.0,0.057971,0.014493,0.0,0.014493,0.0,0.014493,0.043478,0.028986,0.086957,0.014493,0.0,0.0,0.0,0.0,0.014493,0.028986,0.0,0.0,0.0,0.028986,0.0,0.014493,0.0,0.0
3,Christie,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.015873,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.047619,0.031746,0.0,0.047619,0.015873,0.015873,0.0,0.0,0.015873,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.031746,0.0,0.015873,0.111111,0.031746,0.0,0.031746,0.031746,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.047619,0.015873,0.015873,0.0,0.0,0.031746,0.047619,0.0,0.0,0.015873,0.0,0.015873,0.0,0.0,0.015873,0.111111,0.0,0.0,0.0,0.015873,0.015873,0.0,0.031746,0.015873
5,"Commerce Court, Victoria Hotel",0.0,0.06,0.0,0.03,0.0,0.0,0.03,0.01,0.0,0.0,0.03,0.03,0.01,0.0,0.09,0.0,0.01,0.0,0.0,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.02,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.07,0.04,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.1,0.05,0.03,0.04,0.0,0.01,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.03,0.0,0.03,0.0,0.0
6,"First Canadian Place, Underground city",0.0,0.07,0.0,0.03,0.0,0.0,0.03,0.0,0.0,0.01,0.03,0.03,0.02,0.0,0.1,0.0,0.02,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.01,0.02,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.05,0.04,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.07,0.05,0.04,0.04,0.0,0.01,0.0,0.0,0.03,0.04,0.01,0.0,0.0,0.02,0.0,0.02,0.0,0.0
7,"Garden District, Ryerson",0.0,0.0,0.0,0.010526,0.0,0.0,0.031579,0.0,0.0,0.0,0.021053,0.042105,0.010526,0.0,0.105263,0.010526,0.031579,0.0,0.0,0.0,0.010526,0.0,0.021053,0.0,0.0,0.0,0.0,0.0,0.010526,0.031579,0.052632,0.0,0.0,0.010526,0.010526,0.0,0.021053,0.010526,0.0,0.0,0.021053,0.010526,0.0,0.0,0.0,0.031579,0.052632,0.0,0.0,0.010526,0.010526,0.052632,0.010526,0.0,0.0,0.010526,0.0,0.0,0.0,0.073684,0.0,0.0,0.0,0.010526,0.021053,0.073684,0.010526,0.063158,0.010526,0.0,0.0,0.0,0.0,0.010526,0.031579,0.0,0.0,0.0,0.031579,0.0,0.0,0.010526,0.0
8,"Harbourfront East, Union Station, Toronto Islands",0.0,0.012048,0.0,0.0,0.0,0.012048,0.012048,0.0,0.012048,0.0,0.012048,0.0,0.0,0.024096,0.084337,0.0,0.060241,0.0,0.0,0.0,0.024096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,0.072289,0.0,0.0,0.036145,0.012048,0.0,0.036145,0.0,0.0,0.0,0.012048,0.0,0.024096,0.012048,0.0,0.060241,0.036145,0.0,0.0,0.024096,0.012048,0.012048,0.0,0.0,0.0,0.012048,0.0,0.012048,0.0,0.072289,0.0,0.0,0.0,0.0,0.0,0.13253,0.036145,0.024096,0.012048,0.012048,0.0,0.0,0.0,0.036145,0.012048,0.0,0.0,0.0,0.012048,0.0,0.012048,0.0,0.0
9,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.017857,0.0,0.0,0.017857,0.071429,0.017857,0.017857,0.0,0.017857,0.035714,0.017857,0.0,0.125,0.035714,0.017857,0.0,0.017857,0.0,0.017857,0.017857,0.0,0.017857,0.035714,0.035714,0.0,0.017857,0.0,0.0,0.0,0.017857,0.017857,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.017857,0.0,0.017857,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.089286,0.053571,0.0


In [40]:
downtown_grouped.shape

(19, 84)

In [56]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Bakery,Sandwich Place,Restaurant,Italian Restaurant,Sushi Restaurant,Moroccan Restaurant,Greek Restaurant,Diner,Bistro,Seafood Restaurant
1,"CN Tower, King and Spadina, Railway Lands, Har...",American Restaurant,Tapas Restaurant,Wings Joint,Fast Food Restaurant,Diner,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant
2,Central Bay Street,Café,Sandwich Place,Pizza Place,Chinese Restaurant,Italian Restaurant,Restaurant,Fried Chicken Joint,Japanese Restaurant,Falafel Restaurant,Middle Eastern Restaurant
3,Christie,Café,American Restaurant,Italian Restaurant,Restaurant,Wings Joint,Falafel Restaurant,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
4,Church and Wellesley,Sushi Restaurant,Japanese Restaurant,Pizza Place,Burger Joint,Café,Restaurant,Fast Food Restaurant,Mexican Restaurant,Vietnamese Restaurant,Mediterranean Restaurant


#### Cluster

In [57]:
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 3, 2, 4, 2, 2, 2, 2, 2, 2], dtype=int32)

In [58]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

downtown_merged = downtown_data.rename(columns={'Neighbourhood': 'Neighborhood'})
# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Café,Restaurant,Bakery,Breakfast Spot,Gastropub,Mexican Restaurant,Chinese Restaurant,Japanese Restaurant,Sandwich Place,Seafood Restaurant
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2,Sushi Restaurant,Café,Diner,Wings Joint,Middle Eastern Restaurant,Portuguese Restaurant,Burrito Place,Chinese Restaurant,Creperie,Deli / Bodega
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Café,Restaurant,Pizza Place,Sandwich Place,Fast Food Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Burger Joint,Chinese Restaurant,Italian Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Restaurant,Café,Italian Restaurant,Sushi Restaurant,Bakery,American Restaurant,Breakfast Spot,Gastropub,Moroccan Restaurant,Seafood Restaurant
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,Bakery,Sandwich Place,Restaurant,Italian Restaurant,Sushi Restaurant,Moroccan Restaurant,Greek Restaurant,Diner,Bistro,Seafood Restaurant


In [59]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Analysis

In [87]:
optimal_list = []
columns = list(downtown_merged)
columns = columns[11:16]
columns

['6th Most Common Venue',
 '7th Most Common Venue',
 '8th Most Common Venue',
 '9th Most Common Venue',
 '10th Most Common Venue']

In [88]:
#df_temp1 = downtown_merged[downtown_merged[c] == "Restaurant"]
df_temp1 = downtown_merged[downtown_merged["5th Most Common Venue"] == "Mediterranean Restaurant"]
df_temp2 = downtown_merged[downtown_merged["5th Most Common Venue"] == "Moroccan Restaurant"]
df_temp3 = downtown_merged[downtown_merged["5th Most Common Venue"] == "Greek Restaurant"]
df_temp = pd.concat([df_temp1, df_temp2, df_temp3])
for i in columns:
    df_temp1 = downtown_merged[downtown_merged[i] == "Mediterranean Restaurant"]
    df_temp2 = downtown_merged[downtown_merged[i] == "Moroccan Restaurant"]
    df_temp3 = downtown_merged[downtown_merged[i] == "Greek Restaurant"]
    df_temp = pd.concat([df_temp, df_temp1, df_temp2, df_temp3])
df_temp

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,Bakery,Sandwich Place,Restaurant,Italian Restaurant,Sushi Restaurant,Moroccan Restaurant,Greek Restaurant,Diner,Bistro,Seafood Restaurant
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,Bakery,Sandwich Place,Restaurant,Italian Restaurant,Sushi Restaurant,Moroccan Restaurant,Greek Restaurant,Diner,Bistro,Seafood Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Restaurant,Café,Italian Restaurant,Sushi Restaurant,Bakery,American Restaurant,Breakfast Spot,Gastropub,Moroccan Restaurant,Seafood Restaurant
18,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,2,Sushi Restaurant,Japanese Restaurant,Pizza Place,Burger Joint,Café,Restaurant,Fast Food Restaurant,Mexican Restaurant,Vietnamese Restaurant,Mediterranean Restaurant
