# Segmenting and Clustering Neighborhoods in Toronto PART 3

##### Scrape the wikipedia data found at - https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

#Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

#import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.49-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
pull = BeautifulSoup(requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text,'lxml')

toronto_table = pull.find('table',{'class':'wikitable sortable'})
toronto_rows = toronto_table.find_all('tr')
neighborhoods = []

for row in toronto_rows:
    getlist = row.text.split('\n')[1:-1]
    
    neighborhoods.append(getlist)

##### The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood

In [4]:
df_n = pd.DataFrame(neighborhoods[1:], columns = ['Postcode', 'Borough', 'Neighborhood'])

##### Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [5]:
df_nB = df_n[~df_n['Borough'].str.contains('Not assigned')]

##### More than one neighborhood can exist in one postal code area.These two rows will be combined into one row with the neighborhoods separated with a comma.

In [6]:
df_nN = df_nB.groupby(['Postcode','Borough'], sort=False).agg( ', '.join).reset_index()

###### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [7]:
df_nN.loc[df_nN['Neighborhood'] == 'Not assigned' , 'Neighborhood'] = df_nN['Borough']

In [8]:
df_nN.shape

(103, 3)

###### Use link to a csv file that has the geographical coordinates of each postal code: http://cocl.us/Geospatial_data

In [9]:
df_pull_geo = pd.read_csv('http://cocl.us/Geospatial_data')
df_pull_geo.rename({'Postal Code' : 'Postcode'},axis = 'columns', inplace = True)

###### Use the csv file to create the dataframe.

In [10]:
df_nN = df_nB.groupby(['Postcode','Borough'], sort=False).agg( ', '.join).reset_index()
df_nN.loc[df_nN['Neighborhood'] == 'Not assigned' , 'Neighborhood'] = df_nN['Borough']
df_next = pd.merge(df_nN, df_pull_geo[['Postcode', 'Latitude', 'Longitude']], on = 'Postcode')

##### Borough and Neighborhood totals for the city of Toronto

In [11]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_next['Borough'].unique()),
        df_next.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [12]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


##### Create a map of Toronto with neighborhoods superimposed on top.

In [13]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
# add markers to map
for lat, lng, borough, neighborhood in zip(df_next['Latitude'], df_next['Longitude'], df_next['Borough'], df_next['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Choose and explore a particular neighborhood in Toronto

In [14]:
chosen_neighborhood = 'Central Toronto'
chosen_data = df_next[df_next['Borough'] == chosen_neighborhood].reset_index(drop=True)
chosen_data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M5N,Central Toronto,Roselawn,43.711695,-79.416936
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197
3,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307
4,M4R,Central Toronto,North Toronto West,43.715383,-79.405678


In [15]:
address = chosen_neighborhood+', ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude_chosen = location.latitude
longitude_chosen = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude_chosen, longitude_chosen))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [16]:
map_chosen = folium.Map(location=[latitude_chosen, longitude_chosen], zoom_start=11)
# add markers to map
for lat, lng, borough, neighborhood in zip(chosen_data['Latitude'], chosen_data['Longitude'], chosen_data['Borough'], chosen_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_chosen)  
    
map_chosen

In [17]:
CLIENT_ID = 'YW14FMMMENMF3WNAV2A2DHHIFSVMGGKP2DHMZI3EOCWXIQFZ' # your Foursquare ID
CLIENT_SECRET = 'NR2B032ZO33BJPNWFRBCOUASGJ3IY2Z0GSNEX01CFOAWMLW3' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

chosen_data.loc[0, 'Neighborhood']

'Lawrence Park'

In [18]:
neighborhood_latitude = chosen_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = chosen_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = chosen_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Lawrence Park are 43.7280205, -79.3887901.


In [19]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

In [20]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d488a9fbf7dde002ce76454'},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.7325205045, 'lng': -79.3825744605273},
   'sw': {'lat': 43.7235204955, 'lng': -79.3950057394727}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '50e6da19e4b0d8a78a0e9794',
       'name': 'Lawrence Park Ravine',
       'location': {'address': '3055 Yonge Street',
        'crossStreet': 'Lawrence Avenue East',
        'lat': 43.72696303913755,
        'lng': -79.39438246708775,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.72696303913755,
          'lng': -79.39438246708775}],
        'distance': 465,
        'cc': 'CA',
  

In [21]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [22]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

print(nearby_venues.head())
print("\n")
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

                             name   categories        lat        lng
0            Lawrence Park Ravine         Park  43.726963 -79.394382
1              Zodiac Swim School  Swim School  43.728532 -79.382860
2  TTC Bus #162 - Lawrence-Donway     Bus Line  43.728026 -79.382805


3 venues were returned by Foursquare.


### All Neighborhoods

In [23]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

chosen_venues = getNearbyVenues(names=chosen_data['Neighborhood'],
                                   latitudes=chosen_data['Latitude'],
                                   longitudes=chosen_data['Longitude']
                                  )

Lawrence Park
Roselawn
Davisville North
Forest Hill North, Forest Hill West
North Toronto West
The Annex, North Midtown, Yorkville
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West


In [24]:
print(chosen_venues.shape)
chosen_venues.head()

(112, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lawrence Park,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
1,Lawrence Park,43.72802,-79.38879,Zodiac Swim School,43.728532,-79.38286,Swim School
2,Lawrence Park,43.72802,-79.38879,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
3,Roselawn,43.711695,-79.416936,Rosalind's Garden Oasis,43.712189,-79.411978,Garden
4,Roselawn,43.711695,-79.416936,Menchie's St. Clair West,43.707664,-79.414301,Ice Cream Shop


In [25]:
chosen_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Davisville,34,34,34,34,34,34
Davisville North,7,7,7,7,7,7
"Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West",15,15,15,15,15,15
"Forest Hill North, Forest Hill West",4,4,4,4,4,4
Lawrence Park,3,3,3,3,3,3
"Moore Park, Summerhill East",4,4,4,4,4,4
North Toronto West,19,19,19,19,19,19
Roselawn,2,2,2,2,2,2
"The Annex, North Midtown, Yorkville",24,24,24,24,24,24


In [26]:
print('There are {} uniques categories.'.format(len(chosen_venues['Venue Category'].unique())))

There are 58 uniques categories.


### Analyze each neaighborhood

In [27]:
chosen_onehot = pd.get_dummies(chosen_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
chosen_onehot['Neighborhood'] = chosen_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [chosen_onehot.columns[-1]] + list(chosen_onehot.columns[:-1])
chosen_onehot = chosen_onehot[fixed_columns]

chosen_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Farmers Market,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Garden,Gourmet Shop,Greek Restaurant,Gym,History Museum,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Jewish Restaurant,Light Rail Station,Liquor Store,Mexican Restaurant,Park,Pharmacy,Pizza Place,Playground,Pub,Restaurant,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Spa,Sporting Goods Shop,Sports Bar,Summer Camp,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Roselawn,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Roselawn,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [28]:
chosen_onehot.shape

(112, 59)

In [29]:
chosen_grouped = chosen_onehot.groupby('Neighborhood').mean().reset_index()
chosen_grouped

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Dessert Shop,Diner,Farmers Market,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Garden,Gourmet Shop,Greek Restaurant,Gym,History Museum,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jewelry Store,Jewish Restaurant,Light Rail Station,Liquor Store,Mexican Restaurant,Park,Pharmacy,Pizza Place,Playground,Pub,Restaurant,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Spa,Sporting Goods Shop,Sports Bar,Summer Camp,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Davisville,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.058824,0.029412,0.0,0.058824,0.0,0.0,0.029412,0.029412,0.088235,0.029412,0.029412,0.0,0.029412,0.0,0.0,0.029412,0.029412,0.029412,0.0,0.0,0.0,0.029412,0.058824,0.029412,0.0,0.0,0.0,0.0,0.0,0.029412,0.029412,0.058824,0.0,0.0,0.029412,0.0,0.088235,0.029412,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0
1,Davisville North,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.0,0.0,0.066667,0.0,0.133333,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0
3,"Forest Hill North, Forest Hill West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0
4,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0
5,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.052632,0.105263,0.0,0.052632,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.052632,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
7,Roselawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"The Annex, North Midtown, Yorkville",0.041667,0.041667,0.0,0.0,0.0,0.041667,0.0,0.125,0.0,0.0,0.125,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.041667,0.041667,0.083333,0.0,0.041667,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0


In [30]:
chosen_grouped.shape

(9, 59)

In [31]:
num_top_venues = 5

for hood in chosen_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = chosen_grouped[chosen_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Davisville----
                venue  freq
0        Dessert Shop  0.09
1      Sandwich Place  0.09
2  Italian Restaurant  0.06
3         Coffee Shop  0.06
4         Pizza Place  0.06


----Davisville North----
            venue  freq
0  Sandwich Place  0.14
1  Breakfast Spot  0.14
2            Park  0.14
3           Hotel  0.14
4             Gym  0.14


----Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West----
                 venue  freq
0          Coffee Shop  0.13
1                  Pub  0.13
2  American Restaurant  0.07
3           Restaurant  0.07
4           Sports Bar  0.07


----Forest Hill North, Forest Hill West----
                 venue  freq
0        Jewelry Store  0.25
1                Trail  0.25
2     Sushi Restaurant  0.25
3                 Park  0.25
4  American Restaurant  0.00


----Lawrence Park----
                 venue  freq
0             Bus Line  0.33
1                 Park  0.33
2          Swim School  0.33
3  American Restaurant  0.00
4  

In [32]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = chosen_grouped['Neighborhood']

for ind in np.arange(chosen_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(chosen_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Davisville,Sandwich Place,Dessert Shop,Coffee Shop,Café,Italian Restaurant,Sushi Restaurant,Pizza Place,Pharmacy,Park,Dance Studio
1,Davisville North,Hotel,Gym,Breakfast Spot,Clothing Store,Park,Food & Drink Shop,Sandwich Place,Dessert Shop,History Museum,Greek Restaurant
2,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Pub,Coffee Shop,American Restaurant,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Light Rail Station,Liquor Store,Pizza Place,Restaurant
3,"Forest Hill North, Forest Hill West",Trail,Jewelry Store,Sushi Restaurant,Park,Yoga Studio,Deli / Bodega,Gym,Greek Restaurant,Gourmet Shop,Garden
4,Lawrence Park,Swim School,Bus Line,Park,Yoga Studio,Deli / Bodega,History Museum,Gym,Greek Restaurant,Gourmet Shop,Garden


### Cluster neighborhoods

In [33]:
# set number of clusters
kclusters = 5

chosen_grouped_clustering = chosen_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(chosen_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 4, 3, 2, 0, 1, 0], dtype=int32)

In [34]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

chosen_merged = chosen_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
chosen_merged = chosen_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

chosen_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Swim School,Bus Line,Park,Yoga Studio,Deli / Bodega,History Museum,Gym,Greek Restaurant,Gourmet Shop,Garden
1,M5N,Central Toronto,Roselawn,43.711695,-79.416936,1,Ice Cream Shop,Garden,Deli / Bodega,Hotel,History Museum,Gym,Greek Restaurant,Gourmet Shop,Furniture / Home Store,Fried Chicken Joint
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Hotel,Gym,Breakfast Spot,Clothing Store,Park,Food & Drink Shop,Sandwich Place,Dessert Shop,History Museum,Greek Restaurant
3,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307,4,Trail,Jewelry Store,Sushi Restaurant,Park,Yoga Studio,Deli / Bodega,Gym,Greek Restaurant,Gourmet Shop,Garden
4,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,0,Coffee Shop,Sporting Goods Shop,Yoga Studio,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Diner,Mexican Restaurant,Park,Dessert Shop


In [35]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(chosen_merged['Latitude'], chosen_merged['Longitude'], chosen_merged['Neighborhood'], chosen_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

In [36]:
chosen_merged.loc[chosen_merged['Cluster Labels'] == 0, chosen_merged.columns[[1] + list(range(5, chosen_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Central Toronto,0,Hotel,Gym,Breakfast Spot,Clothing Store,Park,Food & Drink Shop,Sandwich Place,Dessert Shop,History Museum,Greek Restaurant
4,Central Toronto,0,Coffee Shop,Sporting Goods Shop,Yoga Studio,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Diner,Mexican Restaurant,Park,Dessert Shop
5,Central Toronto,0,Café,Coffee Shop,Sandwich Place,Pizza Place,Indian Restaurant,Pub,BBQ Joint,Burger Joint,Convenience Store,Cosmetics Shop
6,Central Toronto,0,Sandwich Place,Dessert Shop,Coffee Shop,Café,Italian Restaurant,Sushi Restaurant,Pizza Place,Pharmacy,Park,Dance Studio
8,Central Toronto,0,Pub,Coffee Shop,American Restaurant,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Light Rail Station,Liquor Store,Pizza Place,Restaurant


In [37]:
chosen_merged.loc[chosen_merged['Cluster Labels'] == 1, chosen_merged.columns[[1] + list(range(5, chosen_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Central Toronto,1,Ice Cream Shop,Garden,Deli / Bodega,Hotel,History Museum,Gym,Greek Restaurant,Gourmet Shop,Furniture / Home Store,Fried Chicken Joint


In [38]:
chosen_merged.loc[chosen_merged['Cluster Labels'] == 2, chosen_merged.columns[[1] + list(range(5, chosen_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Central Toronto,2,Gym,Summer Camp,Park,Playground,Yoga Studio,Deli / Bodega,Greek Restaurant,Gourmet Shop,Garden,Furniture / Home Store


In [39]:
chosen_merged.loc[chosen_merged['Cluster Labels'] == 3, chosen_merged.columns[[1] + list(range(5, chosen_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,3,Swim School,Bus Line,Park,Yoga Studio,Deli / Bodega,History Museum,Gym,Greek Restaurant,Gourmet Shop,Garden


In [40]:
chosen_merged.loc[chosen_merged['Cluster Labels'] == 4, chosen_merged.columns[[1] + list(range(5, chosen_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Central Toronto,4,Trail,Jewelry Store,Sushi Restaurant,Park,Yoga Studio,Deli / Bodega,Gym,Greek Restaurant,Gourmet Shop,Garden
