In [124]:
dfMerge.Borough.value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           11
Central Toronto      9
West Toronto         6
East Toronto         5
York                 5
East York            5
Mississauga          1
Queen's Park         1
Name: Borough, dtype: int64

## Ploting in folium all borough

In [125]:
import folium 
from geopy.geocoders import Nominatim
address = 'Toronto, ON, Canadá'
geolocator = Nominatim(user_agent='tr_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.653963 -79.387207


In [126]:
torontoMap = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, long, bor in zip(dfMerge.Latitude, dfMerge.Longitude, dfMerge.Borough):
    folium.CircleMarker([lat, long], radius=2, popup=bor, color='red', fill=True, fill_opacity=1).add_to(torontoMap)
torontoMap

# The North York borough was choosen to apply the k-means model

## Filtering the North York database

In [127]:
northYorkData = df[df['Borough'] == 'North York'].reset_index(drop=True)
northYorkData = pd.merge(northYorkData, dfCoordenates, on='Postcode')
northYorkData.head()


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,Lawrence Heights,43.718518,-79.464763
3,M6A,North York,Lawrence Manor,43.718518,-79.464763
4,M3B,North York,Don Mills North,43.745906,-79.352188


## Ploting all the neighborhoods of North York 

In [128]:
address = 'North York, ON'

geolocator = Nominatim(user_agent='toronto_explorer')
location = geolocator.geocode(address)
lat = location.latitude
long = location.longitude
print(lat, long)

43.7543263 -79.44911696639593


In [129]:
northYorkMap = folium.Map(location=[lat,long], zoom_start=11)

for lati, longi, neigh in zip(northYorkData.Latitude, northYorkData.Longitude, northYorkData.Neighborhood):
    folium.CircleMarker([lati, longi], radius=2, popup=neigh, color='green', fill=True, fill_opacity=1).add_to(northYorkMap)
northYorkMap

## Requesting venues info from each neighborhood

In [130]:
import requests
CLIENT_ID = 'BZFPR3W10IZFCX5EZTLRMDJNQWHPTDPP1U24BHEIZLXGNKFI' 
CLIENT_SECRET = 'N220XGMOZJDT0NDNNY1JQOWCMJ3O0SVANWPXTKVE2X5AT4VQ' 
VERSION = '20180605'
LIMIT=100

In [131]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [132]:
northYork_venues = getNearbyVenues(names=northYorkData['Neighborhood'],
                                   latitudes=northYorkData['Latitude'],
                                   longitudes=northYorkData['Longitude']
                                  )

Parkwoods
Victoria Village
Lawrence Heights
Lawrence Manor
Don Mills North
Glencairn
Flemingdon Park
Don Mills South
Hillcrest Village
Bathurst Manor
Downsview North
Wilson Heights
Fairview
Henry Farm
Oriole
Northwood Park
York University
Bayview Village
CFB Toronto
Downsview East
Silver Hills
York Mills
Downsview West
Downsview
North Park
Upwood Park
Humber Summit
Newtonbrook
Willowdale
Downsview Central
Bedford Park
Lawrence Manor East
Emery
Humberlea
Willowdale South
Downsview Northwest
York Mills West
Willowdale West


In [133]:
northYork_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Careful & Reliable Painting,43.752622,-79.331957,Construction & Landscaping
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


## Treating the database to mode

In [134]:
print(f'There are {len(northYork_venues["Venue Category"].unique())} uniques categories.')

There are 108 uniques categories.


In [135]:
northYork_onehot = pd.get_dummies(northYork_venues[['Venue Category']], prefix="", prefix_sep="")

northYork_onehot['Neighborhood'] = northYork_venues['Neighborhood'] 

fixed_columns = [northYork_onehot.columns[-1]] + list(northYork_onehot.columns[:-1])
northYork_onehot = northYork_onehot[fixed_columns]

northYork_onehot.head(3)

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Tailor Shop,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [136]:
northYork_grouped = northYork_onehot.groupby('Neighborhood').mean().reset_index()
northYork_grouped.head(2)

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Tailor Shop,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,Bathurst Manor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [137]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [138]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{}'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th '.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = northYork_grouped['Neighborhood']

for ind in np.arange(northYork_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(northYork_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(3)

Unnamed: 0,Neighborhood,1st,2nd,3rd,4th,5th,6th,7th,8th,9th,10th
0,Bathurst Manor,Coffee Shop,Shopping Mall,Ice Cream Shop,Restaurant,Bridal Shop,Sandwich Place,Pizza Place,Diner,Pharmacy,Supermarket
1,Bayview Village,Japanese Restaurant,Chinese Restaurant,Café,Bank,Discount Store,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
2,Bedford Park,Coffee Shop,Sandwich Place,Thai Restaurant,Italian Restaurant,Restaurant,Comfort Food Restaurant,Liquor Store,Juice Bar,Pharmacy,Pizza Place


# Cluster Neighborhoods

## fitting model

In [139]:
from sklearn.cluster import KMeans

northYorkGroupedCLustering = northYork_grouped.drop('Neighborhood', axis=1)

kClusters = 5

kmeans = KMeans(n_clusters=kClusters, random_state=0).fit(northYorkGroupedCLustering)


In [140]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
northYorkMerge = northYorkData

northYorkMerge = northYorkMerge.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

northYorkMerge.head(2)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st,2nd,3rd,4th,5th,6th,7th,8th,9th,10th
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Park,Construction & Landscaping,Food & Drink Shop,Women's Store,Diner,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Coffee Shop,French Restaurant,Portuguese Restaurant,Hockey Arena,Women's Store,Diner,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store


In [166]:
print(northYorkMerge['Cluster Labels'].isna().sum())
northYorkMerge.drop(northYorkMerge[northYorkMerge['Cluster Labels'].isnull()].index, axis=0, inplace=True)
print(northYorkMerge['Cluster Labels'].isna().sum())

2
0


## Ploting clusters

In [168]:
import matplotlib.cm as cm
import matplotlib.colors as colors

mapCluster = folium.Map([lat, long], zoom_start=10)

x = np.arange(kClusters)
ys = [i + x + (i*x)**2 for i in range(kClusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

for lati, longi, poi, cluster in zip(northYorkMerge['Latitude'], northYorkMerge['Longitude'], northYorkMerge['Neighborhood'],
                                    northYorkMerge['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lati, longi],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=1).add_to(mapCluster)    
mapCluster


## Clsuter 1 

In [169]:
northYorkMerge.loc[northYorkMerge['Cluster Labels'] == 0, northYorkMerge.columns[[1] + list(range(5, northYorkMerge.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st,2nd,3rd,4th,5th,6th,7th,8th,9th,10th
0,North York,0.0,Park,Construction & Landscaping,Food & Drink Shop,Women's Store,Diner,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop
1,North York,0.0,Coffee Shop,French Restaurant,Portuguese Restaurant,Hockey Arena,Women's Store,Diner,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store
2,North York,0.0,Furniture / Home Store,Accessories Store,Sporting Goods Shop,Event Space,Miscellaneous Shop,Coffee Shop,Clothing Store,Boutique,Carpet Store,Vietnamese Restaurant
3,North York,0.0,Furniture / Home Store,Accessories Store,Sporting Goods Shop,Event Space,Miscellaneous Shop,Coffee Shop,Clothing Store,Boutique,Carpet Store,Vietnamese Restaurant
4,North York,0.0,Gym / Fitness Center,Caribbean Restaurant,Café,Baseball Field,Japanese Restaurant,Women's Store,Discount Store,Concert Hall,Construction & Landscaping,Convenience Store
5,North York,0.0,Japanese Restaurant,Pizza Place,Pub,Sushi Restaurant,Park,Women's Store,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall
6,North York,0.0,Asian Restaurant,Gym,Restaurant,Coffee Shop,Beer Store,Discount Store,Sporting Goods Shop,Chinese Restaurant,Concert Hall,Japanese Restaurant
7,North York,0.0,Asian Restaurant,Gym,Restaurant,Coffee Shop,Beer Store,Discount Store,Sporting Goods Shop,Chinese Restaurant,Concert Hall,Japanese Restaurant
8,North York,0.0,Golf Course,Pool,Mediterranean Restaurant,Dog Run,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store
9,North York,0.0,Coffee Shop,Shopping Mall,Ice Cream Shop,Restaurant,Bridal Shop,Sandwich Place,Pizza Place,Diner,Pharmacy,Supermarket


## Cluster 2

In [170]:
northYorkMerge.loc[northYorkMerge['Cluster Labels'] == 1, northYorkMerge.columns[[1] + list(range(5, northYorkMerge.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st,2nd,3rd,4th,5th,6th,7th,8th,9th,10th
26,North York,1.0,Empanada Restaurant,Women's Store,Discount Store,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega


## Cluster 3

In [171]:
northYorkMerge.loc[northYorkMerge['Cluster Labels'] == 2, northYorkMerge.columns[[1] + list(range(5, northYorkMerge.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st,2nd,3rd,4th,5th,6th,7th,8th,9th,10th
20,North York,2.0,Cafeteria,Women's Store,Discount Store,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
21,North York,2.0,Cafeteria,Women's Store,Discount Store,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store


## Cluster 4

In [172]:
northYorkMerge.loc[northYorkMerge['Cluster Labels'] == 3, northYorkMerge.columns[[1] + list(range(5, northYorkMerge.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st,2nd,3rd,4th,5th,6th,7th,8th,9th,10th
18,North York,3.0,Park,Airport,Snack Place,Diner,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop
19,North York,3.0,Park,Airport,Snack Place,Diner,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop


## Cluster 5

In [173]:
northYorkMerge.loc[northYorkMerge['Cluster Labels'] == 4, northYorkMerge.columns[[1] + list(range(5, northYorkMerge.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st,2nd,3rd,4th,5th,6th,7th,8th,9th,10th
32,North York,4.0,Baseball Field,Women's Store,Dog Run,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
33,North York,4.0,Baseball Field,Women's Store,Dog Run,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
