# Part 1

##  Import librairies

In [4]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
#!pip install folium
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## scrape the Wikipedia page

In [5]:
# import required modules 
from bs4 import BeautifulSoup 

# send the GET request
page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text


In [6]:
#print(page)

In [7]:
# scrape webpage 
soup = BeautifulSoup(page, 'html.parser') 
  

In [8]:
my_table = soup.find('table')
# (my_table)


In [9]:
PostalCodes = []
Boroughs = []
Neighborhoods = []

# for each row of the table, find all the table data
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')

In [10]:
# append the data 
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        PostalCodes.append(cells[0].text)
        Boroughs.append(cells[1].text)
        Neighborhoods.append(cells[2].text.rstrip('\n')) # avoid new lines in neighborhood cell

In [11]:
# create a new DataFrame 
df_toronto = pd.DataFrame({"PostalCode": PostalCodes,
                           "Borough": Boroughs,
                           "Neighborhood": Neighborhoods})

df_toronto = df_toronto.replace('\n',' ', regex=True)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [12]:
df_toronto.Borough = df_toronto.Borough.str.strip()

df_toronto['Borough'].value_counts()

Not assigned        77
North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East Toronto         5
East York            5
York                 5
Mississauga          1
Name: Borough, dtype: int64

In [13]:
# remove rows with borough that is Not assigned.

df_toronto1 = df_toronto[df_toronto.Borough != "Not assigned"].reset_index(drop=True)
df_toronto1.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [14]:
df_toronto1.shape

(103, 3)

## Group neighborhoods in the same postal code area

In [15]:
df_toronto_grouped = df_toronto1.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
df_toronto_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough

In [16]:
# if Neighborhood="Not assigned", put the value the same as Borough
for index, row in df_toronto_grouped.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]
        
df_toronto_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## Print the number of rows of your dataframe

In [17]:
df_toronto_grouped.shape

(103, 3)

# Part 2

In [18]:
df_Geospatial_Coordinates = pd.read_csv("https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv")
df_Geospatial_Coordinates.head()


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Rename the column of postal code

In [19]:
df_Geospatial_Coordinates.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
df_Geospatial_Coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [20]:
df_Geospatial_Coordinates.dtypes

PostalCode     object
Latitude      float64
Longitude     float64
dtype: object

In [21]:
## Merge the tables df_Geospatial_Coordinates et df_toronto_grouped

In [22]:

df_Geospatial_Coordinates['PostalCode'] = df_Geospatial_Coordinates['PostalCode'].str.strip()
df_toronto_grouped['PostalCode'] = df_toronto_grouped['PostalCode'].str.strip()

In [23]:

# Merge tables
df_torontoWithCoordinates=pd.merge(df_toronto_grouped,df_Geospatial_Coordinates, on="PostalCode",how="left")

df_torontoWithCoordinates.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Part3

## Check the number of neighborhoods and boroughs

In [24]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_torontoWithCoordinates['Borough'].unique()),
        df_torontoWithCoordinates.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


## Use geopy library to get the latitude and longitude values of Toronto

In [25]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto city {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto city 43.6534817, -79.3839347.


## create map of Toronto using latitude and longitude values

In [26]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_torontoWithCoordinates['Latitude'], df_torontoWithCoordinates['Longitude'], df_torontoWithCoordinates['Borough'], df_torontoWithCoordinates['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

 # Map and segment and cluster only the neighborhoods in Manhattan.

In [27]:
# Map and segment and cluster only the neighborhoods in WestToronto.

WestToronto_data = df_torontoWithCoordinates[df_torontoWithCoordinates['Borough'] == 'West Toronto'].reset_index(drop=True)
WestToronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259
1,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975
2,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191
3,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763
4,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325


# Get the geographical coordinates of West Toronto.

In [28]:
address = 'West Toronto, Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
locationW = geolocator.geocode(address)
latitudeW = location.latitude
longitudeW = location.longitude
print('The geograpical coordinate of West Toronto are {}, {}.'.format(latitudeW, longitudeW))

The geograpical coordinate of West Toronto are 43.6534817, -79.3839347.


# visualization of West Toronto the neighborhoods in it

In [29]:
# visualization of West Toronto the neighborhoods in it

map_WestToronto = folium.Map(location=[latitudeW, longitudeW], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(WestToronto_data['Latitude'], WestToronto_data['Longitude'], WestToronto_data['Borough'], WestToronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_WestToronto)  
    
map_WestToronto


# Using the Foursquare API to explore the neighborhoods and segment them

### Define Foursquare Credentials and Version

In [30]:
CLIENT_ID = 'KF4FH0AUQSFXUFCA5UMOI14KMQILWL0XJL0IBV5OA4PQBS5P' # your Foursquare ID
CLIENT_SECRET = 'HC1SQULWMVG2LWH2RPVAPEOUPHH4P0THTP343XNTUSMY5DQX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value



### Get the neighborhood's latitude and longitude values.


In [31]:
neighborhood_latitude = WestToronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = WestToronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = WestToronto_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                              neighborhood_latitude, 
                                                              neighborhood_longitude))

Latitude and longitude values of Dufferin, Dovercourt Village are 43.66900510000001, -79.4422593.


### GET request URL

In [32]:

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL


'https://api.foursquare.com/v2/venues/explore?&client_id=KF4FH0AUQSFXUFCA5UMOI14KMQILWL0XJL0IBV5OA4PQBS5P&client_secret=HC1SQULWMVG2LWH2RPVAPEOUPHH4P0THTP343XNTUSMY5DQX&v=20180605&ll=43.66900510000001,-79.4422593&radius=500&limit=100'

In [34]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60196f478a030b1f61c25097'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Davenport',
  'headerFullLocation': 'Davenport, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 17,
  'suggestedBounds': {'ne': {'lat': 43.67350510450001,
    'lng': -79.43604977526607},
   'sw': {'lat': 43.664505095500004, 'lng': -79.44846882473394}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5753753b498eeb535c53aed5',
       'name': 'The Greater Good Bar',
       'location': {'address': '229 Geary St',
        'crossStreet': 'at Dufferin St',
        'lat': 43.669409,
        'lng': -79.439267,
        'labeledLatLngs': [{'label': 'disp

### Get category type function from the Foursquare lab

In [35]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [36]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(10)

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,The Greater Good Bar,Bar,43.669409,-79.439267
1,Parallel,Middle Eastern Restaurant,43.669516,-79.438728
2,Blood Brothers Brewing,Brewery,43.669944,-79.436533
3,FreshCo,Grocery Store,43.667918,-79.440754
4,Happy Bakery & Pastries,Bakery,43.66705,-79.441791
5,Rehearsal Factory,Music Venue,43.668877,-79.443603
6,The Sovereign,Café,43.673116,-79.440265
7,Nova Era Bakery,Bakery,43.669886,-79.437582
8,Food Basics,Supermarket,43.666886,-79.446691
9,TD Canada Trust,Bank,43.667934,-79.441698


### Number of venues were returned by Foursquare

In [37]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

17 venues were returned by Foursquare.


### Function to repeat the same process to all the neighborhoods in Toronto West

In [38]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Create a new dataframe called manhattan_venues.

In [39]:
WestToronto_venues = getNearbyVenues(names=WestToronto_data['Neighborhood'],
                                   latitudes=WestToronto_data['Latitude'],
                                   longitudes=WestToronto_data['Longitude']
                                  )

Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High Park, The Junction South
Parkdale, Roncesvalles
Runnymede, Swansea


In [40]:
print(WestToronto_venues.shape)
WestToronto_venues.head()

(159, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Dufferin, Dovercourt Village",43.669005,-79.442259,The Greater Good Bar,43.669409,-79.439267,Bar
1,"Dufferin, Dovercourt Village",43.669005,-79.442259,Parallel,43.669516,-79.438728,Middle Eastern Restaurant
2,"Dufferin, Dovercourt Village",43.669005,-79.442259,Blood Brothers Brewing,43.669944,-79.436533,Brewery
3,"Dufferin, Dovercourt Village",43.669005,-79.442259,FreshCo,43.667918,-79.440754,Grocery Store
4,"Dufferin, Dovercourt Village",43.669005,-79.442259,Happy Bakery & Pastries,43.66705,-79.441791,Bakery


### Check how many venues were returned for each neighborhood

In [42]:
WestToronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Brockton, Parkdale Village, Exhibition Place",24,24,24,24,24,24
"Dufferin, Dovercourt Village",17,17,17,17,17,17
"High Park, The Junction South",24,24,24,24,24,24
"Little Portugal, Trinity",43,43,43,43,43,43
"Parkdale, Roncesvalles",14,14,14,14,14,14
"Runnymede, Swansea",37,37,37,37,37,37


### Number of unique categories can be curated from all the returned venues

In [44]:
print('There are {} uniques categories.'.format(len(WestToronto_venues['Venue Category'].unique())))

There are 81 uniques categories.


## Analyze Each Neighborhood

In [46]:
# one hot encoding
WestToronto_onehot = pd.get_dummies(WestToronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
WestToronto_onehot['Neighborhood'] = WestToronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [WestToronto_onehot.columns[-1]] + list(WestToronto_onehot.columns[:-1])
WestToronto_onehot = WestToronto_onehot[fixed_columns]

WestToronto_onehot.head()

Unnamed: 0,Neighborhood,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Beer Store,Bookstore,Boutique,Breakfast Spot,Brewery,Burrito Place,Café,Cajun / Creole Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Comic Shop,Convenience Store,Cuban Restaurant,Cupcake Shop,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Fish & Chips Shop,Flea Market,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Health Food Store,Ice Cream Shop,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Liquor Store,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,New American Restaurant,Nightclub,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Pool,Post Office,Pub,Record Shop,Restaurant,Sandwich Place,School,Smoothie Shop,Speakeasy,Stadium,Supermarket,Sushi Restaurant,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Dufferin, Dovercourt Village",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Dufferin, Dovercourt Village",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Dufferin, Dovercourt Village",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Dufferin, Dovercourt Village",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Dufferin, Dovercourt Village",0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [47]:
WestToronto_grouped = WestToronto_onehot.groupby('Neighborhood').mean().reset_index()
WestToronto_grouped

Unnamed: 0,Neighborhood,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Beer Store,Bookstore,Boutique,Breakfast Spot,Brewery,Burrito Place,Café,Cajun / Creole Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Comic Shop,Convenience Store,Cuban Restaurant,Cupcake Shop,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Fish & Chips Shop,Flea Market,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Health Food Store,Ice Cream Shop,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Liquor Store,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,New American Restaurant,Nightclub,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Pool,Post Office,Pub,Record Shop,Restaurant,Sandwich Place,School,Smoothie Shop,Speakeasy,Stadium,Supermarket,Sushi Restaurant,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.041667,0.0,0.0,0.0,0.083333,0.0,0.041667,0.125,0.0,0.041667,0.0,0.083333,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Dufferin, Dovercourt Village",0.0,0.0,0.0,0.0,0.058824,0.117647,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.058824,0.117647,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"High Park, The Junction South",0.041667,0.0,0.041667,0.0,0.0,0.041667,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.083333,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.041667,0.041667,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0
3,"Little Portugal, Trinity",0.0,0.023256,0.0,0.046512,0.0,0.046512,0.0,0.116279,0.023256,0.0,0.023256,0.0,0.023256,0.0,0.046512,0.0,0.0,0.023256,0.046512,0.0,0.0,0.023256,0.023256,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.023256,0.0,0.023256,0.0,0.0,0.0,0.023256,0.0,0.0,0.023256,0.023256,0.023256,0.0,0.0,0.046512,0.0,0.0,0.023256,0.0,0.0,0.023256,0.0,0.023256,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.023256,0.046512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.046512,0.023256,0.023256,0.023256
4,"Parkdale, Roncesvalles",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Runnymede, Swansea",0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.027027,0.0,0.027027,0.0,0.0,0.0,0.027027,0.081081,0.0,0.0,0.0,0.081081,0.027027,0.0,0.0,0.0,0.027027,0.027027,0.0,0.0,0.0,0.027027,0.0,0.027027,0.0,0.027027,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.027027,0.0,0.027027,0.0,0.054054,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.054054,0.0,0.027027,0.054054,0.0,0.054054,0.027027,0.027027,0.027027,0.0,0.0,0.0,0.054054,0.0,0.0,0.027027,0.0,0.0,0.027027


In [48]:
WestToronto_grouped.shape

(6, 82)

### Print each neighborhood along with the top 5 most common venues

In [49]:
num_top_venues = 5

for hood in WestToronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = WestToronto_grouped[WestToronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Brockton, Parkdale Village, Exhibition Place----
            venue  freq
0            Café  0.12
1          Bakery  0.08
2       Nightclub  0.08
3  Breakfast Spot  0.08
4     Coffee Shop  0.08


----Dufferin, Dovercourt Village----
          venue  freq
0        Bakery  0.12
1      Pharmacy  0.12
2   Supermarket  0.06
3  Liquor Store  0.06
4          Pool  0.06


----High Park, The Junction South----
                venue  freq
0  Mexican Restaurant  0.08
1     Thai Restaurant  0.08
2                Café  0.08
3        Antique Shop  0.04
4           Speakeasy  0.04


----Little Portugal, Trinity----
                           venue  freq
0                            Bar  0.12
1                    Men's Store  0.05
2  Vegetarian / Vegan Restaurant  0.05
3                    Coffee Shop  0.05
4                           Café  0.05


----Parkdale, Roncesvalles----
                venue  freq
0      Breakfast Spot  0.14
1           Gift Shop  0.14
2       Movie Theater  0.07
3        D

### Function to sort the venues in descending order

In [50]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Create the new dataframe and display the top 10 venues for each neighborhood

In [52]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = WestToronto_grouped['Neighborhood']

for ind in np.arange(WestToronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(WestToronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Bakery,Coffee Shop,Nightclub,Gym,Grocery Store,Intersection,Italian Restaurant,Convenience Store
1,"Dufferin, Dovercourt Village",Pharmacy,Bakery,Park,Middle Eastern Restaurant,Music Venue,Liquor Store,Pet Store,Pool,Café,Brewery
2,"High Park, The Junction South",Café,Thai Restaurant,Mexican Restaurant,Antique Shop,Furniture / Home Store,Grocery Store,Fried Chicken Joint,Flea Market,Fast Food Restaurant,Italian Restaurant
3,"Little Portugal, Trinity",Bar,Asian Restaurant,Coffee Shop,Men's Store,Restaurant,Bakery,Café,Vegetarian / Vegan Restaurant,Diner,Ice Cream Shop
4,"Parkdale, Roncesvalles",Breakfast Spot,Gift Shop,Bookstore,Dessert Shop,Movie Theater,Coffee Shop,Dog Run,Eastern European Restaurant,Restaurant,Cuban Restaurant


### Cluster Neighborhoods

In [54]:
# set number of clusters
kclusters = 5

WestToronto_grouped_clustering = WestToronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(WestToronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 1, 3, 2, 0, 2], dtype=int32)

### Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [58]:
# add clustering labels
# neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

WestToronto_merged = WestToronto_data



In [59]:
# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
WestToronto_merged = WestToronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

WestToronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,1,Pharmacy,Bakery,Park,Middle Eastern Restaurant,Music Venue,Liquor Store,Pet Store,Pool,Café,Brewery
1,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975,2,Bar,Asian Restaurant,Coffee Shop,Men's Store,Restaurant,Bakery,Café,Vegetarian / Vegan Restaurant,Diner,Ice Cream Shop
2,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191,4,Café,Breakfast Spot,Bakery,Coffee Shop,Nightclub,Gym,Grocery Store,Intersection,Italian Restaurant,Convenience Store
3,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763,3,Café,Thai Restaurant,Mexican Restaurant,Antique Shop,Furniture / Home Store,Grocery Store,Fried Chicken Joint,Flea Market,Fast Food Restaurant,Italian Restaurant
4,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325,0,Breakfast Spot,Gift Shop,Bookstore,Dessert Shop,Movie Theater,Coffee Shop,Dog Run,Eastern European Restaurant,Restaurant,Cuban Restaurant


## Visualize the resulting clusters

In [61]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(WestToronto_merged['Latitude'], WestToronto_merged['Longitude'], WestToronto_merged['Neighborhood'], WestToronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [62]:
## Examine Clusters

In [63]:

WestToronto_merged.loc[WestToronto_merged['Cluster Labels'] == 0, WestToronto_merged.columns[[1] + list(range(5, WestToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,West Toronto,0,Breakfast Spot,Gift Shop,Bookstore,Dessert Shop,Movie Theater,Coffee Shop,Dog Run,Eastern European Restaurant,Restaurant,Cuban Restaurant


In [65]:
WestToronto_merged.loc[WestToronto_merged['Cluster Labels'] == 1, WestToronto_merged.columns[[1] + list(range(5, WestToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,West Toronto,0,Breakfast Spot,Gift Shop,Bookstore,Dessert Shop,Movie Theater,Coffee Shop,Dog Run,Eastern European Restaurant,Restaurant,Cuban Restaurant


In [68]:
WestToronto_merged.loc[WestToronto_merged['Cluster Labels'] == 2, WestToronto_merged.columns[[1] + list(range(5, WestToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,West Toronto,2,Bar,Asian Restaurant,Coffee Shop,Men's Store,Restaurant,Bakery,Café,Vegetarian / Vegan Restaurant,Diner,Ice Cream Shop
5,West Toronto,2,Café,Coffee Shop,Pizza Place,Italian Restaurant,Pub,Sushi Restaurant,Restaurant,French Restaurant,Comic Shop,Latin American Restaurant


In [67]:
WestToronto_merged.loc[WestToronto_merged['Cluster Labels'] == 3, WestToronto_merged.columns[[1] + list(range(5, WestToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,West Toronto,3,Café,Thai Restaurant,Mexican Restaurant,Antique Shop,Furniture / Home Store,Grocery Store,Fried Chicken Joint,Flea Market,Fast Food Restaurant,Italian Restaurant


In [66]:
WestToronto_merged.loc[WestToronto_merged['Cluster Labels'] == 4, WestToronto_merged.columns[[1] + list(range(5, WestToronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,West Toronto,4,Café,Breakfast Spot,Bakery,Coffee Shop,Nightclub,Gym,Grocery Store,Intersection,Italian Restaurant,Convenience Store
