### This Notebook will be used for the neighbourhood analysis capstone project for coursera

In [38]:
import pandas as pd
import numpy as np

#### Reading the contents from a wikipedia page

In [39]:
table=pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

#### Retrieving the contents of the table to a dataframe

In [40]:
df=table[0]

#### Setting Postal Code as the index of the dataframe

In [41]:
df.set_index(df["Postal Code"], inplace=True)

In [42]:
df.drop("Postal Code", axis=1, inplace=True)

In [43]:
df

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1A,Not assigned,Not assigned
M2A,Not assigned,Not assigned
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...
M5Z,Not assigned,Not assigned
M6Z,Not assigned,Not assigned
M7Z,Not assigned,Not assigned
M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


#### Removing any boroughs which are not assigned

In [44]:
df=df[df.Borough!="Not assigned"]
df

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...
M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
M4Y,Downtown Toronto,Church and Wellesley
M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


#### Checking for unassigned neighbourhoods with assigned boroughs

In [45]:
df[df.Neighbourhood=="Not assigned"]

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1


In [46]:
df.shape

(103, 2)

#### Getting the Geographical location data of every neighbourhood in every borough

In [50]:
import io
import requests
url="https://raw.githubusercontent.com/saisreeshma/Neighbourhood_analysis/master/datsets/Geospatial_Coordinates.csv"
s=requests.get(url).content
ll_data=pd.read_csv(io.StringIO(s.decode('utf-8')), index_col="Postal Code")
ll_data

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476
...,...,...
M9N,43.706876,-79.518188
M9P,43.696319,-79.532242
M9R,43.688905,-79.554724
M9V,43.739416,-79.588437


#### Assigning the latitudes and longitudes to their respective locations

In [51]:
df["Latitude"]=ll_data["Latitude"]
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Latitude"]=ll_data["Latitude"]


Unnamed: 0_level_0,Borough,Neighbourhood,Latitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M3A,North York,Parkwoods,43.753259
M4A,North York,Victoria Village,43.725882
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301
...,...,...,...
M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654
M4Y,Downtown Toronto,Church and Wellesley,43.665860
M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744
M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258


In [52]:
df["Longitude"]=ll_data["Longitude"]
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Longitude"]=ll_data["Longitude"]


Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...
M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [53]:
from geopy.geocoders import Nominatim 

import requests
from pandas.io.json import json_normalize


import matplotlib.cm as cm
import matplotlib.colors as colors


from sklearn.cluster import KMeans


import folium
print('Libraries imported.')

Libraries imported.


#### Obtaining the location of Toronto, Canada to create a map

In [54]:
address="Toronto, Canada"
geolocator=Nominatim(user_agent="Toronto_Locator")
location=geolocator.geocode(address)
longitude=location.longitude
latitude=location.latitude
print("Goegraphical co-ordinates of Toronto:({},{})".format(longitude,latitude))

Goegraphical co-ordinates of Toronto:(-79.3839347,43.6534817)


#### Generating the map of Canada with each location marked with a circle

In [55]:
map_toronto=folium.Map(location=[latitude,longitude], zoom_level=10)

for lat, long, borough, neighbourhood in zip(df["Latitude"], df["Longitude"], df["Borough"], df["Neighbourhood"]):
    label='{},{}'.format(neighbourhood, borough)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [lat, long],
    radius=3,
    popup=label,
    color='blue',
    fill=True,
    fill_colour='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
    
map_toronto

#### Generating a clustered map to cluster nearby locations

In [57]:
from folium import plugins

map_toronto=folium.Map(location=[latitude,longitude], zoom_level=10)

clusters=plugins.MarkerCluster().add_to(map_toronto)

for lat, long, borough, neighbourhood in zip(df["Latitude"], df["Longitude"], df["Borough"], df["Neighbourhood"]):
    label='{},{}'.format(neighbourhood, borough)
    label=folium.Popup(label,parse_html=True)
    folium.Marker(
    location=[lat, long],
    popup=label).add_to(clusters)
    
map_toronto


In [58]:
CLIENT_ID = '5B3DP3MB2VUYBXM1U5C0FRPFNKVNL1O1TAHNONMREM5RWOBP' # your Foursquare ID
CLIENT_SECRET = 'WCEG545ZKN1NCDU2VTEWMPS023YC2LLJDUHQAPOMNFLRZAIN' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5B3DP3MB2VUYBXM1U5C0FRPFNKVNL1O1TAHNONMREM5RWOBP
CLIENT_SECRET:WCEG545ZKN1NCDU2VTEWMPS023YC2LLJDUHQAPOMNFLRZAIN


#### Exploring the 'M4A' neighbourhood in our dataframe

In [79]:
df.loc['M4A','Neighbourhood']

'Victoria Village'

In [80]:
neighbourhood_latitude=df.loc['M4A','Latitude']
neighbourhood_longitude=df.loc['M4A', 'Longitude']
neighbourhood_name=df.loc['M4A','Neighbourhood']
print("Neighbourhood: {} in the location ({},{})".format(neighbourhood_name, neighbourhood_latitude, neighbourhood_longitude))

Neighbourhood: Victoria Village in the location (43.725882299999995,-79.31557159999998)


#### Let's see top 100 venues in the Victoria Village Neighbourhood within 500meters radius

In [81]:
LIMIT=100;
RADIUS=500
url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&ll={},{}&v={}&limit={}&radius={}'.format(CLIENT_ID,CLIENT_SECRET,neighbourhood_latitude,
                                                                                            neighbourhood_longitude,VERSION,LIMIT,RADIUS)


In [82]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f1ed356aa50f96509841f34'},
 'response': {'headerLocation': 'Bermondsey',
  'headerFullLocation': 'Bermondsey, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.7303823045, 'lng': -79.30935618239715},
   'sw': {'lat': 43.72138229549999, 'lng': -79.32178701760282}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4c633acb86b6be9a61268e34',
       'name': 'Victoria Village Arena',
       'location': {'lat': 43.72348055545508,
        'lng': -79.31563520925143,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.72348055545508,
          'lng': -79.31563520925143}],
        'distance': 267,
        'cc': 'CA',
        'country': 'Canada',
        'formatte

In [83]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Getting top venues from neighbourhood and cleaning the data

In [89]:
venues=results['response']['groups'][0]['items']
nearby_venues=pd.json_normalize(venues)
filtered_columns=['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues=nearby_venues.loc[:,filtered_columns]
nearby_venues

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Victoria Village Arena,"[{'id': '4bf58dd8d48988d185941735', 'name': 'H...",43.723481,-79.315635
1,Portugril,"[{'id': '4def73e84765ae376e57713a', 'name': 'P...",43.725819,-79.312785
2,Tim Hortons,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",43.725517,-79.313103
3,The Frig,"[{'id': '4bf58dd8d48988d10c941735', 'name': 'F...",43.727051,-79.317418
4,Pizza Nova,"[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",43.725824,-79.31286


In [90]:
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
nearby_venues

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Victoria Village Arena,Hockey Arena,43.723481,-79.315635
1,Portugril,Portuguese Restaurant,43.725819,-79.312785
2,Tim Hortons,Coffee Shop,43.725517,-79.313103
3,The Frig,French Restaurant,43.727051,-79.317418
4,Pizza Nova,Pizza Place,43.725824,-79.31286


In [91]:
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.columns

Index(['name', 'categories', 'lat', 'lng'], dtype='object')

In [92]:
nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Victoria Village Arena,Hockey Arena,43.723481,-79.315635
1,Portugril,Portuguese Restaurant,43.725819,-79.312785
2,Tim Hortons,Coffee Shop,43.725517,-79.313103
3,The Frig,French Restaurant,43.727051,-79.317418
4,Pizza Nova,Pizza Place,43.725824,-79.31286


#### Plotting Venues near the VIctoria Village

In [98]:
latitude=43.725882299999995
longitude=-79.31557159999998
map_victoria=folium.Map(location=[latitude,longitude], zoom_level=100)

for lat, long, name, category in zip(nearby_venues["lat"], nearby_venues["lng"], nearby_venues["name"], nearby_venues["categories"]):
    label='Name: {}, Type: {}'.format(name, category)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [lat, long],
    radius=3,
    popup=label,
    color='blue',
    fill=True,
    fill_colour='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_victoria)
    
map_victoria

#### Function to repeat the process for all neighbourhoods of Toronto

In [100]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        results=requests.get(url).json()['response']['groups'][0]['items']
        
        venues_list.append([(
        name, 
        lat, 
        lng,
        v['venue']['name'],
        v['venue']['location']['lat'],
        v['venue']['location']['lng'],
        v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [101]:
toronto_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [103]:
print(toronto_venues.shape)
toronto_venues.head()

(2154, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


#### Finding number of venues in a neighbourhood

In [104]:
toronto_venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"Alderwood, Long Branch",8,8,8,8,8,8
"Bathurst Manor, Wilson Heights, Downsview North",21,21,21,21,21,21
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",27,27,27,27,27,27
...,...,...,...,...,...,...
"Willowdale, Willowdale East",34,34,34,34,34,34
"Willowdale, Willowdale West",6,6,6,6,6,6
Woburn,4,4,4,4,4,4
Woodbine Heights,7,7,7,7,7,7


#### finding out number of unique categories

In [105]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 267 uniques categories.


In [117]:
toronto_onehot=pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot.drop("Neighborhood", axis=1, inplace=True)
toronto_onehot['Neighborhood']=toronto_venues['Neighborhood']

fixed_columns=[toronto_onehot.columns[-1]]+list(toronto_onehot.columns[:-1])
toronto_onehot=toronto_onehot[fixed_columns]
toronto_onehot

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2149,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2150,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2151,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2152,"Mimico NW, The Queensway West, South of Bloor,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [118]:
toronto_grouped=toronto_onehot.groupby("Neighborhood").mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,"Willowdale, Willowdale East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0
91,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
92,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
93,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0


In [120]:
toronto_grouped.shape

(95, 267)

#### Let's find each neighbourhood's top 5 venues

In [121]:
num_top_venues=5

for hood in toronto_grouped['Neighborhood']:
    print("---"+hood+"---")
    temp=toronto_grouped[toronto_grouped["Neighborhood"]==hood].T.reset_index()
    temp.columns=['venue','freq']
    temp=temp.iloc[1:]
    temp['freq']=temp['freq'].astype(float)
    temp['freq']=temp.round({'freq':2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---Agincourt---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---Alderwood, Long Branch---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---Bathurst Manor, Wilson Heights, Downsview North---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---Bayview Village---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---Bedf

             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---Humewood-Cedarvale---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---India Bazaar, The Beaches West---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---Kennedy Park, Ionview, East Birchmount Park---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---Kensington Ma

             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---Studio District---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse Store


---The Annex, North Midtown, Yorkville---
             venue             freq
0      Yoga Studio      Yoga Studio
1    Women's Store    Women's Store
2      Wings Joint      Wings Joint
3         Wine Bar         Wine Bar
4  Warehouse Store  Warehouse 

In [122]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Creating dataframe to display top 10 venues in a neighborhood

In [157]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']


columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Latin American Restaurant,Clothing Store,Skating Rink,Breakfast Spot,Eastern European Restaurant,Electronics Store,Dumpling Restaurant,Dessert Shop,Drugstore
1,"Alderwood, Long Branch",Pizza Place,Gym,Athletics & Sports,Pub,Sandwich Place,Skating Rink,Coffee Shop,Airport Service,Airport Terminal,Ethiopian Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Mobile Phone Shop,Fried Chicken Joint,Bridal Shop,Diner,Sandwich Place,Deli / Bodega,Restaurant,Middle Eastern Restaurant
3,Bayview Village,Café,Bank,Japanese Restaurant,Chinese Restaurant,Department Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
4,"Bedford Park, Lawrence Manor East",Restaurant,Sandwich Place,Coffee Shop,Thai Restaurant,Italian Restaurant,Indian Restaurant,Fast Food Restaurant,Japanese Restaurant,Liquor Store,Juice Bar


#### Let's cluster neighbourhoods

In [161]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)


kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10].dtype 

dtype('int32')

In [159]:
df.rename(columns={"Neighbourhood":"Neighborhood"}, inplace=True)
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...
M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


#### Adding clustering labels to the dataframe

In [167]:
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_.astype('int32'))
neighborhoods_venues_sorted["Cluster Labels"].dtype
toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
print(toronto_merged.shape)

toronto_merged.head()

(103, 15)


Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M3A,North York,Parkwoods,43.753259,-79.329656,2.0,Park,Food & Drink Shop,Yoga Studio,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
M4A,North York,Victoria Village,43.725882,-79.315572,0.0,French Restaurant,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Distribution Center,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1.0,Coffee Shop,Bakery,Pub,Park,Theater,Breakfast Spot,Café,Yoga Studio,Shoe Store,Restaurant
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1.0,Clothing Store,Accessories Store,Arts & Crafts Store,Furniture / Home Store,Event Space,Miscellaneous Shop,Coffee Shop,Boutique,Women's Store,Vietnamese Restaurant
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1.0,Coffee Shop,Diner,Yoga Studio,Sandwich Place,Restaurant,Park,Mexican Restaurant,Hobby Shop,Fried Chicken Joint,Distribution Center


#### Removing Non-numerical values in Cluster Labels

In [176]:
toronto_merged = toronto_merged[~toronto_merged['Cluster Labels'].isnull()]
toronto_merged.shape

(99, 15)

In [178]:
toronto_merged.head()

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M3A,North York,Parkwoods,43.753259,-79.329656,2.0,Park,Food & Drink Shop,Yoga Studio,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
M4A,North York,Victoria Village,43.725882,-79.315572,0.0,French Restaurant,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Distribution Center,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1.0,Coffee Shop,Bakery,Pub,Park,Theater,Breakfast Spot,Café,Yoga Studio,Shoe Store,Restaurant
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1.0,Clothing Store,Accessories Store,Arts & Crafts Store,Furniture / Home Store,Event Space,Miscellaneous Shop,Coffee Shop,Boutique,Women's Store,Vietnamese Restaurant
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1.0,Coffee Shop,Diner,Yoga Studio,Sandwich Place,Restaurant,Park,Mexican Restaurant,Hobby Shop,Fried Chicken Joint,Distribution Center


#### Visualizing resulting clusters

In [179]:
latitude=43.6534817
longitude=-79.3839347

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        color=rainbow[int(cluster)-1],
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters