# Segmenting and Clustering Neighborhoods in Toronto

## Task 1

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
#scrape data from wiki
res = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
string = res.text
soup = BeautifulSoup(string, "html.parser")

In [3]:
tabledata = soup.table.get_text('|',strip=True)

In [4]:
datalist = tabledata.split('|')

In [5]:
#split the list into three columns
list1 = datalist[::3]
list2 = datalist[1::3]
list3 = datalist[2::3]

In [6]:
#convert list to dataframe
dict = {list1[0]:list1[1:],list2[0]:list2[1:],list3[0]:list3[1:]}
df = pd.DataFrame(dict)

In [7]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [8]:
#change the head name
df.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [9]:
#ignore cells with a 'Borough' that is 'Not assigned'
rows=[i for i in range(0,df.shape[0]) if df.loc[i,'Borough']=='Not assigned']
df = df.drop(rows)
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [10]:
#there is no row whose 'Nrighbourhood' is 'Not assigned', skip this step
#df.loc[df['Borough']=='Not assigned']

In [11]:
df.head(12)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [12]:
df.shape

(103, 3)

## Task 2

In [13]:
df2 = pd.read_csv('Geospatial_Coordinates.csv')
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
#rename the head and merge them
df2.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
dfgeo = pd.merge(df,df2,on='PostalCode')

In [15]:
#show the result
dfgeo.head(12)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [16]:
dfgeo.shape

(103, 5)

## Task 3

### Goal: Explore and cluster the neighborhoods in Toronto.

Select all rows with borough has 'Toronto'

Import libraries we need

In [18]:
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import matplotlib.cm as cm # Matplotlib and associated plotting modules
import matplotlib.colors as colors
from sklearn.cluster import KMeans # import k-means from clustering stage
import folium # map rendering library

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

Use geopy library to get the latitude and longitude values of Toronto

In [19]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


Create a map of Toronto with neighborhoods superimposed on top

In [20]:
# create map of Toronto using latitude and longitude values
map_tordf = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(tordf['Latitude'], tordf['Longitude'], tordf['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tordf)  
    
map_tordf

Define Foursquare credentials

In [21]:
CLIENT_ID = 'MBQTTEVSKWI4CEXPFYIJO4LPDUAT4QNFIUO05IJQKRM04CHI' # your Foursquare ID
CLIENT_SECRET = 'JQVPHUXUJTUHUPDZUL2XCTNNWIOQMSVPIL034WLCHW2PGG1Q' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

Def the function to repeat the same process to all the neighborhoods in YTO

In [22]:
#this function is created by IBM course
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

write the code to run the above function on each neighborhood and create a new dataframe

In [23]:
tor_venues = getNearbyVenues(names=tordf['Neighbourhood'],
                                   latitudes=tordf['Latitude'],
                                   longitudes=tordf['Longitude']
                                  )

check the size of the resulting dataframe

In [24]:
print(tor_venues.shape)
tor_venues.head()

(1648, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


Analyze Each Neighborhood

In [25]:
# one hot encoding
tor_onehot = pd.get_dummies(tor_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tor_onehot['Neighborhood'] = tor_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [tor_onehot.columns[-1]] + list(tor_onehot.columns[:-1])
tor_onehot = tor_onehot[fixed_columns]

tor_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [26]:
tor_grouped = tor_onehot.groupby('Neighborhood').mean().reset_index()
tor_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.066667,0.066667,0.066667,0.133333,0.133333,0.066667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0


Def a function to sort the venues in descending order, create the new dataframe and display the top 10 venues for each neighborhood

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = tor_grouped['Neighborhood']

for ind in np.arange(tor_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(tor_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Seafood Restaurant,Farmers Market,Beer Bar,Bakery,Cocktail Bar,Restaurant,Café,Cheese Shop,Eastern European Restaurant
1,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Coffee Shop,Grocery Store,Bar,Intersection,Bakery,Italian Restaurant,Burrito Place,Restaurant
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Recording Studio,Burrito Place,Restaurant,Brewery,Skate Park
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Coffee Shop,Boat or Ferry,Sculpture Garden,Rental Car Location,Plane,Bar,Harbor / Marina,Airport Terminal
4,Central Bay Street,Coffee Shop,Sandwich Place,Café,Italian Restaurant,Japanese Restaurant,Burger Joint,Juice Bar,Salad Place,Bubble Tea Shop,Department Store


Cluster Neighborhoods

In [29]:
# set number of clusters
kclusters = 4

tor_grouped_clustering = tor_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 3, 0, 1, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [30]:
tordf.rename(columns={'Neighbourhood':'Neighborhood'}, inplace=True)

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

tor_merged = tordf

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
tor_merged = tor_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

tor_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Bakery,Café,Park,Breakfast Spot,Pub,Theater,Yoga Studio,Mexican Restaurant,Shoe Store
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Yoga Studio,Hobby Shop,Distribution Center,Diner,Italian Restaurant,Beer Bar,Smoothie Shop,Café,Bar
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Japanese Restaurant,Bubble Tea Shop,Middle Eastern Restaurant,Diner,Ramen Restaurant,Electronics Store
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Restaurant,Cocktail Bar,Beer Bar,American Restaurant,Gastropub,Park,Farmers Market,Hotel
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Asian Restaurant,Trail,Pub,Health Food Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Electronics Store,Department Store,Doner Restaurant


visualize the resulting clusters

In [31]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighborhood'], tor_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

show clusters groups

In [32]:
tor_merged.loc[tor_merged['Cluster Labels'] == 0, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Bakery,Café,Park,Breakfast Spot,Pub,Theater,Yoga Studio,Mexican Restaurant,Shoe Store
1,Downtown Toronto,0,Coffee Shop,Yoga Studio,Hobby Shop,Distribution Center,Diner,Italian Restaurant,Beer Bar,Smoothie Shop,Café,Bar
2,Downtown Toronto,0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Japanese Restaurant,Bubble Tea Shop,Middle Eastern Restaurant,Diner,Ramen Restaurant,Electronics Store
3,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Cocktail Bar,Beer Bar,American Restaurant,Gastropub,Park,Farmers Market,Hotel
4,East Toronto,0,Asian Restaurant,Trail,Pub,Health Food Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Electronics Store,Department Store,Doner Restaurant
5,Downtown Toronto,0,Coffee Shop,Seafood Restaurant,Farmers Market,Beer Bar,Bakery,Cocktail Bar,Restaurant,Café,Cheese Shop,Eastern European Restaurant
6,Downtown Toronto,0,Coffee Shop,Sandwich Place,Café,Italian Restaurant,Japanese Restaurant,Burger Joint,Juice Bar,Salad Place,Bubble Tea Shop,Department Store
7,Downtown Toronto,0,Grocery Store,Café,Park,Candy Store,Diner,Italian Restaurant,Restaurant,Baby Store,Athletics & Sports,Coffee Shop
8,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Clothing Store,Gym,Hotel,Thai Restaurant,Bar,Lounge,Concert Hall
9,West Toronto,0,Pharmacy,Bakery,Grocery Store,Middle Eastern Restaurant,Supermarket,Bank,Bar,Pizza Place,Music Venue,Coffee Shop


In [33]:
tor_merged.loc[tor_merged['Cluster Labels'] == 1, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Central Toronto,1,Park,Trail,Tennis Court,Restaurant,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
33,Downtown Toronto,1,Park,Playground,Trail,Women's Store,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center


In [34]:
tor_merged.loc[tor_merged['Cluster Labels'] == 2, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Central Toronto,2,Pool,Garden,Women's Store,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [35]:
tor_merged.loc[tor_merged['Cluster Labels'] == 3, tor_merged.columns[[1] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,3,Bus Line,Park,Swim School,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
21,Central Toronto,3,Park,Trail,Jewelry Store,Sushi Restaurant,Bus Line,Comic Shop,Dessert Shop,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


# Task 5

In [36]:
tor_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [37]:
# get the categories of all venues
category=list(set(tor_venues['Venue Category']))

In [38]:
category

['Clothing Store',
 'Cocktail Bar',
 'Thai Restaurant',
 'Brazilian Restaurant',
 'Food Truck',
 'Stadium',
 'Event Space',
 'Movie Theater',
 'Diner',
 'Boutique',
 'Tanning Salon',
 'Airport Food Court',
 'Persian Restaurant',
 'Airport Terminal',
 'Comic Shop',
 'Distribution Center',
 'Sporting Goods Shop',
 'Farmers Market',
 'Swim School',
 'Historic Site',
 'Building',
 'Skating Rink',
 'Smoke Shop',
 'Indian Restaurant',
 'Breakfast Spot',
 'Smoothie Shop',
 'Coworking Space',
 'Opera House',
 'Afghan Restaurant',
 'Café',
 'Beer Bar',
 'Nightclub',
 'German Restaurant',
 'Noodle House',
 'French Restaurant',
 'Cajun / Creole Restaurant',
 'Gym',
 'History Museum',
 'Fried Chicken Joint',
 'Beer Store',
 'Sports Bar',
 'Sandwich Place',
 'Airport Service',
 'Train Station',
 'College Arts Building',
 'Market',
 'Bistro',
 'Soup Place',
 'Boat or Ferry',
 'Belgian Restaurant',
 'Aquarium',
 'Shopping Mall',
 'Tea Room',
 'Restaurant',
 'IT Services',
 'Steakhouse',
 'Fruit & Veg

In [39]:
# get all category related to restaurant
res_catg=list()
for i in category:
    if 'Restaurant' in i:
        res_catg.append(i)
print(res_catg)

['Thai Restaurant', 'Brazilian Restaurant', 'Persian Restaurant', 'Indian Restaurant', 'Afghan Restaurant', 'German Restaurant', 'French Restaurant', 'Cajun / Creole Restaurant', 'Belgian Restaurant', 'Restaurant', 'Mediterranean Restaurant', 'Modern European Restaurant', 'Vegetarian / Vegan Restaurant', 'Italian Restaurant', 'Seafood Restaurant', 'Fast Food Restaurant', 'Gluten-free Restaurant', 'Asian Restaurant', 'Doner Restaurant', 'Chinese Restaurant', 'Dumpling Restaurant', 'Theme Restaurant', 'Korean Restaurant', 'Dim Sum Restaurant', 'Colombian Restaurant', 'Moroccan Restaurant', 'Japanese Restaurant', 'Portuguese Restaurant', 'Greek Restaurant', 'Ethiopian Restaurant', 'New American Restaurant', 'Middle Eastern Restaurant', 'Filipino Restaurant', 'Comfort Food Restaurant', 'Vietnamese Restaurant', 'Taiwanese Restaurant', 'Latin American Restaurant', 'Ramen Restaurant', 'Eastern European Restaurant', 'Molecular Gastronomy Restaurant', 'Sushi Restaurant', 'Cuban Restaurant', 'Me

In [40]:
# select restaurants data from venues data
tor_res= tor_venues[tor_venues['Venue Category'].isin(res_catg)]
tor_res.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
3,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
21,"Regent Park, Harbourfront",43.65426,-79.360636,El Catrin,43.650601,-79.35892,Mexican Restaurant
22,"Regent Park, Harbourfront",43.65426,-79.360636,Cluny Bistro & Boulangerie,43.650565,-79.357843,French Restaurant
47,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Nando's,43.661728,-79.386391,Portuguese Restaurant
48,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Mercatto,43.660391,-79.387664,Italian Restaurant


In [41]:
# create map of all resaurants
map_allres = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to the map
for i in list(tor_res['Venue']):
    label = folium.Popup(i, parse_html=True)
    folium.Marker(
        location = 
        [list(tor_res[tor_res['Venue']==i]['Venue Latitude'])[0], 
         list(tor_res[tor_res['Venue']==i]['Venue Longitude'])[0]],
        popup=label,
        ).add_to(map_allres)
       
map_allres 

In [42]:
# statistic analysis
count=tor_res['Neighborhood'].value_counts()
print(count)

First Canadian Place, Underground city                                                  30
Commerce Court, Victoria Hotel                                                          28
Toronto Dominion Centre, Design Exchange                                                26
Church and Wellesley                                                                    25
St. James Town                                                                          25
Richmond, Adelaide, King                                                                24
Stn A PO Boxes                                                                          23
Kensington Market, Chinatown, Grange Park                                               21
Garden District, Ryerson                                                                20
Central Bay Street                                                                      19
The Danforth West, Riverdale                                                            16

In [43]:
# create bubble map
map_res = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to the map
for i in list(count.index):
    label = folium.Popup(i + ' res: ' + str(count[i]), parse_html=True)
    folium.CircleMarker(
        location = 
        [list(tor_res[tor_res['Neighborhood']==i]['Neighborhood Latitude'])[0], 
         list(tor_res[tor_res['Neighborhood']==i]['Neighborhood Longitude'])[0]],
        radius=count[i]/1,
        popup=label,
        color='pink',
        fill=True,
        fill_color='red',
        fill_opacity=count[i]/40
        ).add_to(map_res)
       
map_res 

Our choice:  
First Canadian Place, Underground city  
Commerce Court, Victoria Hotel  
Toronto Dominion Centre, Design Exchange  

In [44]:
tor_res['Venue Category'].value_counts()

Restaurant                         53
Italian Restaurant                 36
Japanese Restaurant                32
Sushi Restaurant                   25
Seafood Restaurant                 22
American Restaurant                21
Vegetarian / Vegan Restaurant      17
Thai Restaurant                    17
Asian Restaurant                   13
Greek Restaurant                   12
Mexican Restaurant                 11
Fast Food Restaurant               11
Chinese Restaurant                 10
French Restaurant                  10
New American Restaurant             9
Vietnamese Restaurant               8
Middle Eastern Restaurant           7
Indian Restaurant                   7
Latin American Restaurant           6
Comfort Food Restaurant             6
Ramen Restaurant                    5
Caribbean Restaurant                5
Mediterranean Restaurant            4
Moroccan Restaurant                 4
Gluten-free Restaurant              4
Eastern European Restaurant         3
Modern Europ

In [45]:
res1=tor_res[tor_res['Neighborhood']=='First Canadian Place, Underground city']
res1['Venue Category'].value_counts()

Restaurant                       4
Japanese Restaurant              4
American Restaurant              3
Seafood Restaurant               3
Asian Restaurant                 3
Sushi Restaurant                 2
Mediterranean Restaurant         1
Brazilian Restaurant             1
Vegetarian / Vegan Restaurant    1
Latin American Restaurant        1
Fast Food Restaurant             1
Colombian Restaurant             1
Gluten-free Restaurant           1
Italian Restaurant               1
Greek Restaurant                 1
Thai Restaurant                  1
New American Restaurant          1
Name: Venue Category, dtype: int64

In [46]:
res2=tor_res[tor_res['Neighborhood']=='Commerce Court, Victoria Hotel']
res2['Venue Category'].value_counts()

Restaurant                       7
American Restaurant              4
Japanese Restaurant              3
Seafood Restaurant               3
Vegetarian / Vegan Restaurant    2
Italian Restaurant               2
Asian Restaurant                 2
New American Restaurant          1
Gluten-free Restaurant           1
Latin American Restaurant        1
French Restaurant                1
Thai Restaurant                  1
Name: Venue Category, dtype: int64

In [47]:
res3=tor_res[tor_res['Neighborhood']=='Toronto Dominion Centre, Design Exchange']
res3['Venue Category'].value_counts()

Restaurant                       4
Japanese Restaurant              3
American Restaurant              3
Seafood Restaurant               3
Italian Restaurant               3
Asian Restaurant                 2
Greek Restaurant                 1
French Restaurant                1
Sushi Restaurant                 1
New American Restaurant          1
Gluten-free Restaurant           1
Fast Food Restaurant             1
Vegetarian / Vegan Restaurant    1
Chinese Restaurant               1
Name: Venue Category, dtype: int64

In [48]:
top10=list(tor_res['Venue Category'].value_counts().index)[:10]
print(top10)

['Restaurant', 'Italian Restaurant', 'Japanese Restaurant', 'Sushi Restaurant', 'Seafood Restaurant', 'American Restaurant', 'Vegetarian / Vegan Restaurant', 'Thai Restaurant', 'Asian Restaurant', 'Greek Restaurant']


In [49]:
print('First Canadian Place, Underground city is suitable for:')
for i in top10:
    if i not in list(res1['Venue Category'].value_counts().index):
        print(i)

First Canadian Place, Underground city is suitable for:


In [50]:
print('Commerce Court, Victoria Hotel is suitable for:')
for i in top10:
    if i not in list(res2['Venue Category'].value_counts().index):
        print(i)

Commerce Court, Victoria Hotel is suitable for:
Sushi Restaurant
Greek Restaurant


In [51]:
print('Toronto Dominion Centre, Design Exchange is suitable for:')
for i in top10:
    if i not in list(res3['Venue Category'].value_counts().index):
        print(i)

Toronto Dominion Centre, Design Exchange is suitable for:
Thai Restaurant
