# Capstone Project Notebook

### Importing required libraries

In [2]:
import numpy as np
import pandas as pd
import json
import requests
import urllib.request
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



### Map of Toronto, Canada

In [24]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11, tiles='Stamen Terrain')

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [25]:
toronto_map

### Scraping data from wikipedia

In [373]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page=urllib.request.urlopen(url)
soup=BeautifulSoup(page,"lxml")
data=soup.find('table',class_="wikitable sortable")
df=pd.read_html(str(data))
df = pd.read_json(df[0].to_json(orient='records'))
df=pd.DataFrame(df)
header=df.iloc[0]
df=df[1:]
df.columns=header
df

Unnamed: 0,Postal Code,Borough,Neighborhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M8A,Not assigned,Not assigned
9,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
10,M1B,Scarborough,"Malvern, Rouge"


### Removing rows from data which have Borough equals to "Not assigned"

In [374]:
df=df[df['Borough']!="Not assigned"]
df.shape

(103, 3)

### Merging different neighbourhoods of a Borough into one

In [375]:
raw_df=df.groupby(['Borough','Postal Code'],as_index=False).agg(','.join)
raw_df

Unnamed: 0,Borough,Postal Code,Neighborhood
0,Central Toronto,M4N,Lawrence Park
1,Central Toronto,M4P,Davisville North
2,Central Toronto,M4R,"North Toronto West, Lawrence Park"
3,Central Toronto,M4S,Davisville
4,Central Toronto,M4T,"Moore Park, Summerhill East"
5,Central Toronto,M4V,"Summerhill West, Rathnelly, South Hill, Forest..."
6,Central Toronto,M5N,Roselawn
7,Central Toronto,M5P,"Forest Hill North & West, Forest Hill Road Park"
8,Central Toronto,M5R,"The Annex, North Midtown, Yorkville"
9,Downtown Toronto,M4W,Rosedale


### Dealing with "Not assigned" value in Neighbourhood column

In [376]:
for x in range(raw_df.shape[0]):
    row=raw_df.iloc[x]
    if(row['Neighborhood']=="Not assigned"):
        row['Neighborhood']=row['Borough']

In [377]:
for x in range(raw_df.shape[0]):
    row=raw_df.iloc[x]
    if(row['Neighborhood']=="Not assigned"):
        print("Error")

In [378]:
raw_df.shape

(103, 3)

### I'm using spreadsheet provided in the instructions of week 3 to get the coordinates of PostalCodes

In [379]:
geo_cord=pd.read_csv('http://cocl.us/Geospatial_data')
geo_cord

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [380]:
merged_df=pd.merge(raw_df,geo_cord,on="Postal Code")
merged_df

Unnamed: 0,Borough,Postal Code,Neighborhood,Latitude,Longitude
0,Central Toronto,M4N,Lawrence Park,43.728020,-79.388790
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197
2,Central Toronto,M4R,"North Toronto West, Lawrence Park",43.715383,-79.405678
3,Central Toronto,M4S,Davisville,43.704324,-79.388790
4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.383160
5,Central Toronto,M4V,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
6,Central Toronto,M5N,Roselawn,43.711695,-79.416936
7,Central Toronto,M5P,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307
8,Central Toronto,M5R,"The Annex, North Midtown, Yorkville",43.672710,-79.405678
9,Downtown Toronto,M4W,Rosedale,43.679563,-79.377529


### Explore and cluster the neighborhoods with Boroughs

In [381]:
merged_df['Borough'].unique()

array(['Central Toronto', 'Downtown Toronto', 'East Toronto', 'East York',
       'Etobicoke', 'Mississauga', 'North York', 'Scarborough',
       'West Toronto', 'York'], dtype=object)

In [382]:
toronto_data=merged_df

In [383]:
toronto_data.head()

Unnamed: 0,Borough,Postal Code,Neighborhood,Latitude,Longitude
0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197
2,Central Toronto,M4R,"North Toronto West, Lawrence Park",43.715383,-79.405678
3,Central Toronto,M4S,Davisville,43.704324,-79.38879
4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.38316


In [384]:
CLIENT_ID = 'JOSMF3XVVONCAY0K52FIUP0XQFUORNHGR3IKD1DUVG2BHLYX' # your Foursquare ID
CLIENT_SECRET = 'CPH35Z4YUT1HX3PF1CDQBXEJ5XGSOKB3SI4PCMI1BOR3ID1N' # your Foursquare Secret
VERSION = '20180605'

In [385]:
LIMIT=100
radius=500
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format('50e6da19e4b0d8a78a0e9794',CLIENT_ID,CLIENT_SECRET,VERSION)
url

'https://api.foursquare.com/v2/venues/50e6da19e4b0d8a78a0e9794?client_id=JOSMF3XVVONCAY0K52FIUP0XQFUORNHGR3IKD1DUVG2BHLYX&client_secret=CPH35Z4YUT1HX3PF1CDQBXEJ5XGSOKB3SI4PCMI1BOR3ID1N&v=20180605'

In [386]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ef10877882fc7001bf1888a'},
 'response': {'venue': {'id': '50e6da19e4b0d8a78a0e9794',
   'name': 'Lawrence Park Ravine',
   'contact': {'twitter': 'bobrodkin'},
   'location': {'address': '3055 Yonge Street',
    'crossStreet': 'Lawrence Avenue East',
    'lat': 43.72696303913755,
    'lng': -79.39438246708775,
    'labeledLatLngs': [{'label': 'display',
      'lat': 43.72696303913755,
      'lng': -79.39438246708775}],
    'cc': 'CA',
    'city': 'Toronto',
    'state': 'ON',
    'country': 'Canada',
    'formattedAddress': ['3055 Yonge Street (Lawrence Avenue East)',
     'Toronto ON',
     'Canada']},
   'canonicalUrl': 'https://foursquare.com/v/lawrence-park-ravine/50e6da19e4b0d8a78a0e9794',
   'categories': [{'id': '4bf58dd8d48988d163941735',
     'name': 'Park',
     'pluralName': 'Parks',
     'shortName': 'Park',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
      'suffix': '.png'},
     'primary': Tr

### GEtting data for venues of all neighbors

In [387]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    LIMIT=100
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['id'], 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue Id',
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [388]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Lawrence Park
Davisville North
North Toronto West, Lawrence Park
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Roselawn
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Queen's Park, Ontario Provincial Government
The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Business reply mail Processing Centre, South Central Letter 

In [389]:
toronto_venues.to_csv('toronto_venue.csv',index=False) 

In [390]:
toronto_venues=pd.read_csv('toronto_venue.csv')
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lawrence Park,43.72802,-79.38879,50e6da19e4b0d8a78a0e9794,Lawrence Park Ravine,43.726963,-79.394382,Park
1,Lawrence Park,43.72802,-79.38879,5082ef77e4b0a7491cf7b022,Zodiac Swim School,43.728532,-79.38286,Swim School
2,Lawrence Park,43.72802,-79.38879,50ed9da8e4b081eabee12672,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
3,Davisville North,43.712751,-79.390197,4ba011c2f964a5204a5737e3,Sherwood Park,43.716551,-79.387776,Park
4,Davisville North,43.712751,-79.390197,4e8e73c30cd6209590ae7be4,Summerhill Market North,43.715499,-79.392881,Food & Drink Shop


In [391]:
toronto_venues.shape

(2108, 8)

In [392]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue Id,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Agincourt,4,4,4,4,4,4,4
"Alderwood, Long Branch",7,7,7,7,7,7,7
"Bathurst Manor, Wilson Heights, Downsview North",20,20,20,20,20,20,20
Bayview Village,4,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",24,24,24,24,24,24,24
Berczy Park,57,57,57,57,57,57,57
"Birch Cliff, Cliffside West",5,5,5,5,5,5,5
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22,22
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",15,15,15,15,15,15,15
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",17,17,17,17,17,17,17


In [393]:
len(toronto_venues['Venue Category'].unique())

268

### One hot encoding which makes out machine learning model to work

In [394]:
toronto_onehot=pd.get_dummies(toronto_venues[['Venue Category']])
toronto_onehot['Neighborhood']=toronto_venues['Neighborhood']
fixed_columns=[toronto_onehot.columns[-1]]+list(toronto_onehot.columns[:-1])
toronto_onehot=toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,Neighborhood,Venue Category_Accessories Store,Venue Category_Afghan Restaurant,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Gate,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,...,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Video Store,Venue Category_Vietnamese Restaurant,Venue Category_Warehouse Store,Venue Category_Wine Bar,Venue Category_Wine Shop,Venue Category_Wings Joint,Venue Category_Women's Store,Venue Category_Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [395]:
toronto_grouped=toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Venue Category_Accessories Store,Venue Category_Afghan Restaurant,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Gate,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,...,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Video Store,Venue Category_Vietnamese Restaurant,Venue Category_Warehouse Store,Venue Category_Wine Bar,Venue Category_Wine Shop,Venue Category_Wings Joint,Venue Category_Women's Store,Venue Category_Yoga Studio
0,Agincourt,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
1,"Alderwood, Long Branch",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
3,Bayview Village,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
4,"Bedford Park, Lawrence Manor East",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.041667,...,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
5,Berczy Park,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.017544,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
6,"Birch Cliff, Cliffside West",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
7,"Brockton, Parkdale Village, Exhibition Place",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
8,"Business reply mail Processing Centre, South C...",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.000000,0.000000,0.058824,0.058824,0.058824,0.117647,0.117647,0.117647,0.000000,...,0.000000,0.000000,0.000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000


In [396]:
toronto_grouped.shape

(96, 269)

In [397]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [398]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Venue Category_Lounge,Venue Category_Latin American Restaurant,Venue Category_Skating Rink,Venue Category_Breakfast Spot,Venue Category_Donut Shop,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant
1,"Alderwood, Long Branch",Venue Category_Pizza Place,Venue Category_Gym,Venue Category_Coffee Shop,Venue Category_Sandwich Place,Venue Category_Pub,Venue Category_Pool,Venue Category_Drugstore,Venue Category_Donut Shop,Venue Category_Doner Restaurant,Venue Category_Deli / Bodega
2,"Bathurst Manor, Wilson Heights, Downsview North",Venue Category_Coffee Shop,Venue Category_Bank,Venue Category_Fried Chicken Joint,Venue Category_Bridal Shop,Venue Category_Sandwich Place,Venue Category_Diner,Venue Category_Restaurant,Venue Category_Deli / Bodega,Venue Category_Supermarket,Venue Category_Middle Eastern Restaurant
3,Bayview Village,Venue Category_Café,Venue Category_Bank,Venue Category_Chinese Restaurant,Venue Category_Japanese Restaurant,Venue Category_Yoga Studio,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant
4,"Bedford Park, Lawrence Manor East",Venue Category_Coffee Shop,Venue Category_Restaurant,Venue Category_Sandwich Place,Venue Category_Italian Restaurant,Venue Category_Thai Restaurant,Venue Category_Pharmacy,Venue Category_Pizza Place,Venue Category_Pub,Venue Category_Café,Venue Category_Butcher


### Using K-Means Algorithm to cluster similar Neighborhoods

In [399]:
kclusters = 8

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[10:20] 

array([4, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [400]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster_Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Borough,Postal Code,Neighborhood,Latitude,Longitude,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879,4.0,Venue Category_Park,Venue Category_Bus Line,Venue Category_Swim School,Venue Category_Yoga Studio,Venue Category_Doner Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Donut Shop
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197,0.0,Venue Category_Gym / Fitness Center,Venue Category_Breakfast Spot,Venue Category_Hotel,Venue Category_Food & Drink Shop,Venue Category_Department Store,Venue Category_Park,Venue Category_Sandwich Place,Venue Category_Gym,Venue Category_Airport Terminal,Venue Category_American Restaurant
2,Central Toronto,M4R,"North Toronto West, Lawrence Park",43.715383,-79.405678,0.0,Venue Category_Clothing Store,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Sporting Goods Shop,Venue Category_Café,Venue Category_Chinese Restaurant,Venue Category_Diner,Venue Category_Fast Food Restaurant,Venue Category_Gift Shop,Venue Category_Mexican Restaurant
3,Central Toronto,M4S,Davisville,43.704324,-79.38879,0.0,Venue Category_Pizza Place,Venue Category_Sandwich Place,Venue Category_Dessert Shop,Venue Category_Coffee Shop,Venue Category_Gym,Venue Category_Italian Restaurant,Venue Category_Café,Venue Category_Sushi Restaurant,Venue Category_Brewery,Venue Category_Discount Store
4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.38316,0.0,Venue Category_Tennis Court,Venue Category_Doner Restaurant,Venue Category_Dessert Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Yoga Studio,Venue Category_Department Store


In [409]:
toronto_merged['Cluster_Labels'].value_counts()

0.0    83
4.0     8
1.0     3
3.0     2
6.0     1
5.0     1
7.0     1
2.0     1
Name: Cluster_Labels, dtype: int64

In [402]:
toronto_merged.dropna(axis=0, how='any', thresh=None, subset=None, inplace=True)

In [404]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster_Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [410]:
neigh_data=toronto_merged.query('Cluster_Labels==4')
neigh=neigh_data['Neighborhood'].values
venues_data=toronto_venues[toronto_venues.Neighborhood.isin(neigh)]
venues_data.reset_index(inplace=True)
venues_data.drop(['index'],axis=1)
venues_data.columns = venues_data.columns.str.replace(' ', '')
venues_data['VenueCategory'].values
# venues_id=venues_data['Venue Id'].values
# venues_id.shape

array(['Park', 'Swim School', 'Bus Line', 'Playground', 'Park', 'Park',
       'Trail', 'Park', 'Park', 'Convenience Store', 'Park', 'Smoke Shop',
       'River', 'Park', 'Park', 'Playground', 'Park', "Women's Store",
       'Pool', 'Park', 'Park'], dtype=object)

In [425]:
def get_rating(category):
    venue_data=venues_data.query('VenueCategory==@category')
    venue_id_list=venue_data['VenueId'].values

    venue_rating=0
    counter=0
    if(len(venue_id_list)!=0):
        for venueid in venue_id_list:
            LIMIT=100
            radius=500
            url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venueid,CLIENT_ID,CLIENT_SECRET,VERSION)
            result = requests.get(url).json()

            if('rating' in result['response']['venue'].keys()):
                venue_rating+=(result['response']['venue']['rating'])
                counter+=1
    else:
        output="No found {} in selected cluster".format(category)
        return(output)
    return (venue_rating/counter)

In [426]:
venues_data['VenueCategory'].values

array(['Park', 'Swim School', 'Bus Line', 'Playground', 'Park', 'Park',
       'Trail', 'Park', 'Park', 'Convenience Store', 'Park', 'Smoke Shop',
       'River', 'Park', 'Park', 'Playground', 'Park', "Women's Store",
       'Pool', 'Park', 'Park'], dtype=object)

In [427]:
category=input("Choose a category from above mentioned categories to get rating:")
print("Average rating for {} is {}".format(category,get_rating(category)))

Choose a category from above mentioned categories to get rating:ed
Average rating for ed is No found ed in selected cluster


### Thank You!