# Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto (Part 2)

### Importing all the libraries and dependencies needed

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.50-py_0           conda-forge
    geopy:         1.22.0-pyh9f0ad1d_0 conda-forge


Downloading and Extracting Packages
geopy-1.22.0         | 63 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done

# Load the coordinates from the csv file link

In [4]:
coordinates = pd.read_csv("http://cocl.us/Geospatial_data") #Not using the Geocoder package to obtain the coordinates, using the CSV file given instead
coordinates.head(12)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### Next steps are to obtain the results from Part 1 of the assignment

In [5]:
from bs4 import BeautifulSoup

In [6]:
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(page.content, 'html.parser')

In [7]:
table = soup.find('tbody')
rows = table.select('tr')
row = [r.get_text() for r in rows]

In [8]:
df = pd.DataFrame(row)
df1 = df[0].str.split('\n', expand=True)
df2 = df1.rename(columns=df1.iloc[0])
df3 = df2.drop(df2.index[0])
df3.head()

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood,Unnamed: 7
1,,M1A,,Not assigned,,Not assigned,
2,,M2A,,Not assigned,,Not assigned,
3,,M3A,,North York,,Parkwoods,
4,,M4A,,North York,,Victoria Village,
5,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront",


In [9]:
df4 = df3[df3.Borough != 'Not assigned'].reset_index(drop=True)
df4.head(10)

Unnamed: 0,Unnamed: 1,Postal Code,Unnamed: 3,Borough,Unnamed: 5,Neighborhood,Unnamed: 7
0,,M3A,,North York,,Parkwoods,
1,,M4A,,North York,,Victoria Village,
2,,M5A,,Downtown Toronto,,"Regent Park, Harbourfront",
3,,M6A,,North York,,"Lawrence Manor, Lawrence Heights",
4,,M7A,,Downtown Toronto,,"Queen's Park, Ontario Provincial Government",
5,,M9A,,Etobicoke,,"Islington Avenue, Humber Valley Village",
6,,M1B,,Scarborough,,"Malvern, Rouge",
7,,M3B,,North York,,Don Mills,
8,,M4B,,East York,,"Parkview Hill, Woodbine Gardens",
9,,M5B,,Downtown Toronto,,"Garden District, Ryerson",


In [10]:
df5 = df4.groupby(['Postal Code', 'Borough'], sort = False).agg(','.join)
df5.reset_index(inplace = True)
df5.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [12]:
# for Neighborhood="Not assigned", make the value the same as Borough
for index, row in df5.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]
        

In [13]:
df5.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


### Merge the two dataframes on a new one using 'Postal Code' column as reference for both of them

In [14]:
toronto_data = pd.merge(df5,coordinates,on='Postal Code')
toronto_data

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# Part 3- Exploring and Clustering Neighborhoods in Toronto

In [17]:
# To determine the coordinates of Toronto
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


In [22]:
# How many boroughs and neighborhoods are in the dataframe
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto_data['Borough'].unique()),
        toronto_data.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


In [19]:
# create map of Toronto using latitude and longitude values, with markers for the neighborhoods and boroughs
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Utilizing the Foursquare API to explore the neighborhoods and segment them

In [21]:
CLIENT_ID = 'J2LC5YTQ2GI5HDUOQGV0YQT05MDDSCLMHD0JQBBO0VWKVJ0G' # my Foursquare ID
CLIENT_SECRET = '0AUNUN2XKBTULMWLE240ETUOROE3F3RA4YXBDDQYT5DP4FYO' # my Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your Foursquare API credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your Foursquare API credentails:
CLIENT_ID: J2LC5YTQ2GI5HDUOQGV0YQT05MDDSCLMHD0JQBBO0VWKVJ0G
CLIENT_SECRET:0AUNUN2XKBTULMWLE240ETUOROE3F3RA4YXBDDQYT5DP4FYO


### Let's create a function to repeat the same process to all the neighborhoods in Toronto

In [23]:
LIMIT=30
def getNearbyVenues(names, latitudes, longitudes, radius=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)

### Now write the code to run the above function on each neighborhood and create a new dataframe called toronto_venues

In [24]:

toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

### Check the size of the dataframe

In [25]:

print(toronto_venues.shape)
toronto_venues.head()

(138, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
1,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
2,"Regent Park, Harbourfront",43.65426,-79.360636,Jane Roos Gallery,43.653962,-79.36109,Art Gallery
3,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,Roots,43.718214,-79.463893,Boutique
4,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,Pinstripe,43.719083,-79.464953,Men's Store


#### Let's check how many venues were returned for each neighborhood

In [26]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",3,3,3,3,3,3
"Bedford Park, Lawrence Manor East",4,4,4,4,4,4
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",1,1,1,1,1,1
Canada Post Gateway Processing Centre,1,1,1,1,1,1
Central Bay Street,4,4,4,4,4,4
Christie,1,1,1,1,1,1
"Commerce Court, Victoria Hotel",16,16,16,16,16,16
Davisville,10,10,10,10,10,10
Don Mills,1,1,1,1,1,1
"Fairview, Henry Farm, Oriole",2,2,2,2,2,2


### Let's find out how many unique categories can be curated from all the returned venues

In [27]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 77 uniques categories.


# Analysing each Neighborhood

In [30]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Art Gallery,Asian Restaurant,Astrologer,Bakery,Bank,Bar,Beer Store,Bookstore,Boutique,Breakfast Spot,Brewery,Building,Burger Joint,Burrito Place,Café,Chinese Restaurant,Cocktail Bar,Coffee Shop,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Farmers Market,Fast Food Restaurant,Flower Shop,Food Court,Fried Chicken Joint,Furniture / Home Store,Garden,Gastropub,Gay Bar,Gift Shop,Gluten-free Restaurant,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hobby Shop,Hostel,Hotel,Housing Development,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Liquor Store,Men's Store,Middle Eastern Restaurant,Nightclub,Park,Performing Arts Venue,Pharmacy,Pizza Place,Playground,Pub,Record Shop,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Soup Place,Spa,Supermarket,Sushi Restaurant,Taco Place,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Lawrence Manor, Lawrence Heights",0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Lawrence Manor, Lawrence Heights",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [31]:
print('The above dataframe'' size is: ',toronto_onehot.shape)

The above dataframe size is:  (138, 78)


### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [32]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Art Gallery,Asian Restaurant,Astrologer,Bakery,Bank,Bar,Beer Store,Bookstore,Boutique,Breakfast Spot,Brewery,Building,Burger Joint,Burrito Place,Café,Chinese Restaurant,Cocktail Bar,Coffee Shop,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Farmers Market,Fast Food Restaurant,Flower Shop,Food Court,Fried Chicken Joint,Furniture / Home Store,Garden,Gastropub,Gay Bar,Gift Shop,Gluten-free Restaurant,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hobby Shop,Hostel,Hotel,Housing Development,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Liquor Store,Men's Store,Middle Eastern Restaurant,Nightclub,Park,Performing Arts Venue,Pharmacy,Pizza Place,Playground,Pub,Record Shop,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Soup Place,Spa,Supermarket,Sushi Restaurant,Taco Place,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Yoga Studio
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Canada Post Gateway Processing Centre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Let's print each neighborhood along with the top 5 most common venues

In [33]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
                 venue  freq
0          Coffee Shop  0.33
1        Deli / Bodega  0.33
2  Fried Chicken Joint  0.33
3          Men's Store  0.00
4          Pizza Place  0.00


----Bedford Park, Lawrence Manor East----
                venue  freq
0         Coffee Shop  0.25
1          Hobby Shop  0.25
2  Italian Restaurant  0.25
3    Sushi Restaurant  0.25
4              Hostel  0.00


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                   venue  freq
0  Performing Arts Venue   1.0
1      Accessories Store   0.0
2           Liquor Store   0.0
3            Pizza Place   0.0
4               Pharmacy   0.0


----Canada Post Gateway Processing Centre----
                   venue  freq
0            Coffee Shop   1.0
1           Liquor Store   0.0
2            Pizza Place   0.0
3               Pharmacy   0.0
4  Performing Arts Venue   0.0


----Central Bay Str

### Putting this into a pandas dataframe

In [34]:
#First to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood

In [35]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)
neighborhoods_venues_sorted.head(10)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Fried Chicken Joint,Deli / Bodega,Farmers Market,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Dessert Shop,Diner
1,"Bedford Park, Lawrence Manor East",Italian Restaurant,Hobby Shop,Sushi Restaurant,Coffee Shop,Yoga Studio,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop
2,"CN Tower, King and Spadina, Railway Lands, Har...",Performing Arts Venue,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Fast Food Restaurant
3,Canada Post Gateway Processing Centre,Coffee Shop,Farmers Market,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Yoga Studio
4,Central Bay Street,Coffee Shop,Pharmacy,Sandwich Place,Cocktail Bar,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop
5,Christie,Nightclub,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Fast Food Restaurant
6,"Commerce Court, Victoria Hotel",Seafood Restaurant,Bookstore,Café,Burrito Place,Deli / Bodega,Pub,Fast Food Restaurant,Salad Place,Sandwich Place,Soup Place
7,Davisville,Café,Dessert Shop,Flower Shop,Italian Restaurant,Seafood Restaurant,Sushi Restaurant,Coffee Shop,Toy / Game Store,Food Court,Fast Food Restaurant
8,Don Mills,Restaurant,Yoga Studio,Cocktail Bar,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
9,"Fairview, Henry Farm, Oriole",Theater,Bank,Yoga Studio,Fast Food Restaurant,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner


# Clustering Neighborhoods 

Running k-means to cluster the neighborhood into 5 clusters

In [46]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 2, 0, 0, 2, 2, 2, 2, 2], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [49]:
toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood', how = 'right') 

toronto_merged.head(10) # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Spa,Art Gallery,Breakfast Spot,Yoga Studio,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,2,Accessories Store,Men's Store,Boutique,Coffee Shop,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
7,M3B,North York,Don Mills,43.745906,-79.352188,2,Restaurant,Yoga Studio,Cocktail Bar,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
13,M3C,North York,Don Mills,43.7259,-79.340923,2,Restaurant,Yoga Studio,Cocktail Bar,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,1,Park,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Fast Food Restaurant
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Art Gallery,Coffee Shop,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
10,M6B,North York,Glencairn,43.709577,-79.445073,3,Italian Restaurant,Astrologer,Yoga Studio,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Farmers Market
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Japanese Restaurant,Coffee Shop,Hostel,Performing Arts Venue,Breakfast Spot,Italian Restaurant,Diner,Gay Bar,Flower Shop,Fast Food Restaurant
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Trail,Yoga Studio,Farmers Market,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Pharmacy,Sandwich Place,Cocktail Bar,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop


Let's visualize the resulting clusters

In [50]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

#### Cluster 1

In [51]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Downtown Toronto,0,Art Gallery,Coffee Shop,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
24,Downtown Toronto,0,Coffee Shop,Pharmacy,Sandwich Place,Cocktail Bar,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop
28,North York,0,Coffee Shop,Fried Chicken Joint,Deli / Bodega,Farmers Market,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Dessert Shop,Diner
36,Downtown Toronto,0,Gym,Coffee Shop,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
42,Downtown Toronto,0,Coffee Shop,Restaurant,Gym,Deli / Bodega,Diner,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Dessert Shop
70,Etobicoke,0,Coffee Shop,Pizza Place,Farmers Market,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
76,Mississauga,0,Coffee Shop,Farmers Market,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Yoga Studio


#### Cluster 2

In [52]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,East York,1,Park,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Fast Food Restaurant
83,Central Toronto,1,Park,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Fast Food Restaurant
91,Downtown Toronto,1,Park,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Fast Food Restaurant


#### Cluster 3

In [53]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,2,Spa,Art Gallery,Breakfast Spot,Yoga Studio,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
3,North York,2,Accessories Store,Men's Store,Boutique,Coffee Shop,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
7,North York,2,Restaurant,Yoga Studio,Cocktail Bar,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
13,North York,2,Restaurant,Yoga Studio,Cocktail Bar,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
15,Downtown Toronto,2,Japanese Restaurant,Coffee Shop,Hostel,Performing Arts Venue,Breakfast Spot,Italian Restaurant,Diner,Gay Bar,Flower Shop,Fast Food Restaurant
25,Downtown Toronto,2,Nightclub,Yoga Studio,Farmers Market,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Fast Food Restaurant
29,East York,2,Indian Restaurant,Middle Eastern Restaurant,Sandwich Place,Housing Development,Coffee Shop,Diner,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega
30,Downtown Toronto,2,Greek Restaurant,Sushi Restaurant,Vegetarian / Vegan Restaurant,Food Court,Japanese Restaurant,Concert Hall,Coffee Shop,Restaurant,Garden,Taco Place
32,Scarborough,2,Playground,Farmers Market,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Fast Food Restaurant
33,North York,2,Theater,Bank,Yoga Studio,Fast Food Restaurant,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner


#### Cluster 4

In [54]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,North York,3,Italian Restaurant,Astrologer,Yoga Studio,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Farmers Market
96,Downtown Toronto,3,Italian Restaurant,Yoga Studio,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Farmers Market,Fast Food Restaurant


#### Cluster 5

In [55]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,East Toronto,4,Trail,Yoga Studio,Farmers Market,College Gym,Concert Hall,Cosmetics Shop,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner
